/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-p5-x20-acc5.c | 69 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 70 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 71 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 72 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 73 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 92 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 93 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 94 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 95 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 96 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() [all …]
|
D | neon-p5-x20.c | 65 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 66 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 67 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 68 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 69 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 88 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 89 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 90 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 91 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 92 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() [all …]
|
D | neon-p5-x20-acc2.c | 66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 70 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 89 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 90 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 91 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 92 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 93 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() [all …]
|
D | neon-p5-x16.c | 63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 64 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 66 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 83 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 84 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 85 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 86 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 88 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 89 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() [all …]
|
D | neon-p5-x16-acc2.c | 64 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 65 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 67 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 84 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 85 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 86 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 87 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 89 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 90 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() [all …]
|
D | neon-p5-x16-acc4.c | 66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 86 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 87 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 88 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 89 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 91 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 92 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() [all …]
|
D | neon-p5-x12-acc2.c | 62 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 63 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 64 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 79 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 80 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 81 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 83 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 84 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 85 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 88 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() [all …]
|
D | neon-p5-x12.c | 61 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 62 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 63 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 78 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 79 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 80 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 82 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 83 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 84 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 87 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() [all …]
|
D | neon-p5-x12-acc3.c | 63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 64 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 80 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 81 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 82 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 84 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 85 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 86 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 89 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() [all …]
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neon-rr2-p6-x24.c | 60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 61 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 62 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 63 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 64 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 65 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 80 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 81 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 82 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 83 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() [all …]
|
D | velu-neon-rr2-p6-x20.c | 58 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 59 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 60 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 61 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 62 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 75 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 76 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 77 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 78 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 79 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() [all …]
|
D | velu-neon-rr2-p6-x16.c | 56 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 57 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 58 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 59 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vlog2e); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 70 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 71 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 72 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 73 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 75 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 76 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 8x8s4inc-minmax-neon.c | 119 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 120 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 121 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 122 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 123 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 124 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 125 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 126 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 127 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() 128 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() [all …]
|
D | 6x8s4inc-minmax-neon.c | 101 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 102 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 103 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 104 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 105 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 106 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 107 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 108 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 109 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() 110 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() [all …]
|
D | 6x8inc-minmax-neon-dup-ld128.c | 107 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 108 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 109 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 110 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 111 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 112 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 113 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 114 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 115 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() 116 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 8x8s4-minmax-neon.c | 150 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 151 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 152 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 153 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 154 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 155 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 156 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 157 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 158 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() 159 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() [all …]
|
D | 6x8-minmax-neon-dup-ld128.c | 133 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 134 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 135 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 136 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 137 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 138 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 139 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 140 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 141 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() 142 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128() [all …]
|
D | 6x8s4-minmax-neon.c | 126 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 127 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 128 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 129 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 130 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 131 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 132 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 133 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 134 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() 135 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_igemm_minmax_ukernel_6x8s4__neon() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 8x8s4-minmax-neon.c | 117 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 118 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 119 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 120 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 121 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 122 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 123 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 124 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 125 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() 126 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() [all …]
|
D | 6x8s4-minmax-neon.c | 99 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 100 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 101 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 102 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 103 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 104 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 105 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 106 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 107 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() 108 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() [all …]
|
D | 6x8-minmax-neon-dup-ld128.c | 105 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 106 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 107 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 108 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 109 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 110 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 111 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 112 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 113 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() 114 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neon-rr2-p5-nr2recps-x24.c | 53 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 54 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 55 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 56 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 57 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 58 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 74 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 75 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 76 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 77 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() [all …]
|
D | neon-rr2-p5-nr2recps-x20.c | 51 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 52 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 53 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 54 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 55 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 69 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 70 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 71 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 72 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 73 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() [all …]
|
D | neon-rr2-p5-nr2recps-x16.c | 49 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 50 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 51 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 52 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 64 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 65 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 66 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 67 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 69 vt0123 = vmlaq_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 70 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() [all …]
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-minmax-neon.c | 174 vacc0123p0 = vmlaq_f32(vacc0123p0, vi0x0123, vk0x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 175 vacc4567p0 = vmlaq_f32(vacc4567p0, vi0x4567, vk0x4567); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 181 vacc0123p0 = vmlaq_f32(vacc0123p0, vi1x0123, vk1x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 182 vacc4567p0 = vmlaq_f32(vacc4567p0, vi1x4567, vk1x4567); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 188 vacc0123p0 = vmlaq_f32(vacc0123p0, vi2x0123, vk2x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 189 vacc4567p0 = vmlaq_f32(vacc4567p0, vi2x4567, vk2x4567); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 195 vacc0123p0 = vmlaq_f32(vacc0123p0, vi3x0123, vk3x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 196 vacc4567p0 = vmlaq_f32(vacc4567p0, vi3x4567, vk3x4567); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 202 vacc0123p0 = vmlaq_f32(vacc0123p0, vi4x0123, vk4x0123); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() 203 vacc4567p0 = vmlaq_f32(vacc4567p0, vi4x4567, vk4x4567); in xnn_f32_dwconv_minmax_ukernel_up8x25__neon() [all …]
|