/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neon-frac-p9-p10-nr1recps-x16.c | 70 float32x4_t vnum0123 = vmlaq_f32(valpha_7, vn0123_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 71 float32x4_t vnum4567 = vmlaq_f32(valpha_7, vn4567_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 72 float32x4_t vnum89AB = vmlaq_f32(valpha_7, vn89AB_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 73 float32x4_t vnumCDEF = vmlaq_f32(valpha_7, vnCDEF_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 75 vnum0123 = vmlaq_f32(valpha_5, vn0123_sq, vnum0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 76 vnum4567 = vmlaq_f32(valpha_5, vn4567_sq, vnum4567); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 77 vnum89AB = vmlaq_f32(valpha_5, vn89AB_sq, vnum89AB); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 78 vnumCDEF = vmlaq_f32(valpha_5, vnCDEF_sq, vnumCDEF); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 80 vnum0123 = vmlaq_f32(valpha_3, vn0123_sq, vnum0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() 81 vnum4567 = vmlaq_f32(valpha_3, vn4567_sq, vnum4567); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16() [all …]
|
D | neon-rr2-p5-nr2recps-x24.c | 70 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 71 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 72 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 73 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 74 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 75 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 96 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 97 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 98 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 99 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() [all …]
|
D | neon-rr2-p5-nr2recps-x20.c | 68 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 69 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 70 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 71 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 72 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 91 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 92 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 93 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 94 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 95 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20() [all …]
|
D | neon-rr2-p5-nr2recps-x16.c | 66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 86 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 87 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 88 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 89 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 91 vt0123 = vmlaq_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 92 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() [all …]
|
D | neon-rr2-p5-nr2recps-x12.c | 64 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 65 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 81 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 82 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 83 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 85 vt0123 = vmlaq_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 86 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 87 vt89AB = vmlaq_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 90 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-p5-x20-acc5.c | 69 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 70 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 71 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 72 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 73 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 92 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 93 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 94 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 95 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 96 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() [all …]
|
D | neon-p5-x20.c | 65 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 66 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 67 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 68 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 69 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 88 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 89 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 90 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 91 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 92 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() [all …]
|
D | neon-p5-x20-acc2.c | 66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 70 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 89 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 90 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 91 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 92 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 93 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() [all …]
|
D | neon-p5-x16-acc2.c | 64 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 65 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 67 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 84 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 85 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 86 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 87 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 89 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 90 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() [all …]
|
D | neon-p5-x16.c | 63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 64 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 66 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 83 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 84 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 85 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 86 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 88 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 89 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() [all …]
|
D | neon-p5-x16-acc4.c | 66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 86 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 87 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 88 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 89 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 91 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 92 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() [all …]
|
D | neon-p5-x12.c | 61 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 62 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 63 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 78 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 79 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 80 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 82 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 83 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 84 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 87 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() [all …]
|
D | neon-p5-x12-acc2.c | 62 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 63 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 64 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 79 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 80 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 81 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 83 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 84 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 85 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 88 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() [all …]
|
D | neon-p5-x12-acc3.c | 63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 64 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 80 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 81 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 82 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 84 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 85 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 86 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 89 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() [all …]
|
D | neon-p5-x8-acc2.c | 60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 61 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 75 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 77 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 78 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 81 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 82 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 84 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 85 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() [all …]
|
D | neon-p5-x8.c | 59 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 60 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 74 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 76 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 77 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 80 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 81 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 83 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 84 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 8x8s4-neon.c | 119 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 120 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 121 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 122 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 123 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 124 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 125 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 126 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 127 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() 128 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemminc_ukernel_8x8s4__neon() [all …]
|
D | 6x8s4-neon.c | 101 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 102 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 103 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 104 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 105 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 106 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 107 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 108 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 109 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() 110 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon() [all …]
|
D | 6x8-neon-dup-ld128.c | 107 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 108 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 109 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 110 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 111 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 112 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 113 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 114 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 115 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() 116 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 8x8s4-neon.c | 150 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 151 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 152 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 153 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 154 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 155 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 156 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 157 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 158 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_igemm_ukernel_8x8s4__neon() 159 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_igemm_ukernel_8x8s4__neon() [all …]
|
D | 6x8-neon-dup-ld128.c | 133 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 134 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 135 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 136 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 137 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 138 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 139 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 140 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 141 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() 142 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128() [all …]
|
D | 6x8s4-neon.c | 126 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 127 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 128 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 129 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 130 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 131 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 132 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 133 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 134 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon() 135 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 8x8s4-neon.c | 117 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 118 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 119 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 120 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 121 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 122 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 123 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 124 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 125 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_ukernel_8x8s4__neon() 126 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemm_ukernel_8x8s4__neon() [all …]
|
D | 6x8s4-neon.c | 99 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 100 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 101 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 102 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 103 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 104 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 105 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 106 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 107 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon() 108 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon() [all …]
|
D | 6x8-neon-dup-ld128.c | 105 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 106 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 107 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 108 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 109 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 110 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 111 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 112 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 113 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() 114 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128() [all …]
|