Home
last modified time | relevance | path

Searched refs:vmlaq_f32 (Results 1 – 25 of 136) sorted by relevance

123456

/external/XNNPACK/src/f32-sigmoid/gen/
Dneon-frac-p9-p10-nr1recps-x16.c70 float32x4_t vnum0123 = vmlaq_f32(valpha_7, vn0123_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
71 float32x4_t vnum4567 = vmlaq_f32(valpha_7, vn4567_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
72 float32x4_t vnum89AB = vmlaq_f32(valpha_7, vn89AB_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
73 float32x4_t vnumCDEF = vmlaq_f32(valpha_7, vnCDEF_sq, valpha_9); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
75 vnum0123 = vmlaq_f32(valpha_5, vn0123_sq, vnum0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
76 vnum4567 = vmlaq_f32(valpha_5, vn4567_sq, vnum4567); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
77 vnum89AB = vmlaq_f32(valpha_5, vn89AB_sq, vnum89AB); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
78 vnumCDEF = vmlaq_f32(valpha_5, vnCDEF_sq, vnumCDEF); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
80 vnum0123 = vmlaq_f32(valpha_3, vn0123_sq, vnum0123); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
81 vnum4567 = vmlaq_f32(valpha_3, vn4567_sq, vnum4567); in xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16()
[all …]
Dneon-rr2-p5-nr2recps-x24.c70 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
71 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
72 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
73 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
74 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
75 float32x4_t vnKLMN = vmlaq_f32(vmagic_bias, vzKLMN, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
96 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
97 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
98 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
99 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
[all …]
Dneon-rr2-p5-nr2recps-x20.c68 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
69 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
70 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
71 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
72 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vzGHIJ, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
91 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
92 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
93 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
94 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
95 float32x4_t vtGHIJ = vmlaq_f32(vzGHIJ, vnGHIJ, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
[all …]
Dneon-rr2-p5-nr2recps-x16.c66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vzCDEF, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
86 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
87 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
88 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
89 float32x4_t vtCDEF = vmlaq_f32(vzCDEF, vnCDEF, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
91 vt0123 = vmlaq_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
92 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
[all …]
Dneon-rr2-p5-nr2recps-x12.c64 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vz0123, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
65 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vz4567, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vz89AB, vminus_log2e); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
81 float32x4_t vt0123 = vmlaq_f32(vz0123, vn0123, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
82 float32x4_t vt4567 = vmlaq_f32(vz4567, vn4567, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
83 float32x4_t vt89AB = vmlaq_f32(vz89AB, vn89AB, vln2_hi); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
85 vt0123 = vmlaq_f32(vt0123, vn0123, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
86 vt4567 = vmlaq_f32(vt4567, vn4567, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
87 vt89AB = vmlaq_f32(vt89AB, vn89AB, vln2_lo); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
90 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
[all …]
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneon-p5-x20-acc5.c69 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
70 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
71 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
72 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
73 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
92 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
93 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
94 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
95 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
96 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
[all …]
Dneon-p5-x20.c65 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
66 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
67 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
68 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
69 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
88 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
89 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
90 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
91 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
92 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
[all …]
Dneon-p5-x20-acc2.c66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
70 float32x4_t vnGHIJ = vmlaq_f32(vmagic_bias, vxGHIJ, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
89 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
90 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
91 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
92 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
93 float32x4_t vtGHIJ = vmlaq_f32(vxGHIJ, vnGHIJ, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
[all …]
Dneon-p5-x16-acc2.c64 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
65 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
66 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
67 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
84 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
85 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
86 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
87 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
89 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
90 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
[all …]
Dneon-p5-x16.c63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
64 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
66 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
83 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
84 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
85 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
86 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
88 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
89 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
[all …]
Dneon-p5-x16-acc4.c66 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
67 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
68 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
69 float32x4_t vnCDEF = vmlaq_f32(vmagic_bias, vxCDEF, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
86 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
87 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
88 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
89 float32x4_t vtCDEF = vmlaq_f32(vxCDEF, vnCDEF, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
91 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
92 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
[all …]
Dneon-p5-x12.c61 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
62 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
63 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
78 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
79 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
80 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
82 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
83 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
84 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
87 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
[all …]
Dneon-p5-x12-acc2.c62 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
63 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
64 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
79 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
80 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
81 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
83 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
84 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
85 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
88 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
[all …]
Dneon-p5-x12-acc3.c63 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
64 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
65 float32x4_t vn89AB = vmlaq_f32(vmagic_bias, vx89AB, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
80 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
81 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
82 float32x4_t vt89AB = vmlaq_f32(vx89AB, vn89AB, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
84 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
85 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
86 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
89 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
[all …]
Dneon-p5-x8-acc2.c60 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
61 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
74 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
75 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
77 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
78 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
81 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
82 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
84 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
85 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
[all …]
Dneon-p5-x8.c59 float32x4_t vn0123 = vmlaq_f32(vmagic_bias, vx0123, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
60 float32x4_t vn4567 = vmlaq_f32(vmagic_bias, vx4567, vlog2e); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
73 float32x4_t vt0123 = vmlaq_f32(vx0123, vn0123, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
74 float32x4_t vt4567 = vmlaq_f32(vx4567, vn4567, vminus_ln2_hi); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
76 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
77 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
80 float32x4_t vp0123 = vmlaq_f32(vc4, vc5, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
81 float32x4_t vp4567 = vmlaq_f32(vc4, vc5, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
83 vp0123 = vmlaq_f32(vc3, vp0123, vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
84 vp4567 = vmlaq_f32(vc3, vp4567, vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D8x8s4-neon.c119 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
120 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
121 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
122 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
123 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
124 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
125 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
126 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
127 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
128 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemminc_ukernel_8x8s4__neon()
[all …]
D6x8s4-neon.c101 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
102 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
103 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
104 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
105 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
106 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
107 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
108 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
109 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
110 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_gemminc_ukernel_6x8s4__neon()
[all …]
D6x8-neon-dup-ld128.c107 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
108 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
109 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
110 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
111 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
112 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
113 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
114 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
115 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
116 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128()
[all …]
/external/XNNPACK/src/f32-igemm/gen/
D8x8s4-neon.c150 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
151 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
152 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
153 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
154 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
155 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
156 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
157 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
158 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
159 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_igemm_ukernel_8x8s4__neon()
[all …]
D6x8-neon-dup-ld128.c133 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
134 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
135 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
136 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
137 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
138 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
139 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
140 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
141 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
142 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld128()
[all …]
D6x8s4-neon.c126 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
127 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
128 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
129 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
130 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
131 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
132 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
133 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
134 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
135 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_igemm_ukernel_6x8s4__neon()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D8x8s4-neon.c117 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
118 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
119 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
120 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
121 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
122 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
123 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
124 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
125 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
126 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemm_ukernel_8x8s4__neon()
[all …]
D6x8s4-neon.c99 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
100 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
101 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
102 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
103 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
104 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
105 vacc0x4567 = vmlaq_f32(vacc0x4567, va0, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
106 vacc1x4567 = vmlaq_f32(vacc1x4567, va1, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
107 vacc2x4567 = vmlaq_f32(vacc2x4567, va2, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
108 vacc3x4567 = vmlaq_f32(vacc3x4567, va3, vb4567c0); in xnn_f32_gemm_ukernel_6x8s4__neon()
[all …]
D6x8-neon-dup-ld128.c105 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
106 vacc1x0123 = vmlaq_f32(vacc1x0123, va1c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
107 vacc2x0123 = vmlaq_f32(vacc2x0123, va2c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
108 vacc3x0123 = vmlaq_f32(vacc3x0123, va3c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
109 vacc4x0123 = vmlaq_f32(vacc4x0123, va4c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
110 vacc5x0123 = vmlaq_f32(vacc5x0123, va5c0, vb0123c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
111 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
112 vacc1x4567 = vmlaq_f32(vacc1x4567, va1c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
113 vacc2x4567 = vmlaq_f32(vacc2x4567, va2c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
114 vacc3x4567 = vmlaq_f32(vacc3x4567, va3c0, vb4567c0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld128()
[all …]

123456