Home
last modified time | relevance | path

Searched refs:vxb89ABCDEFc0 (Results 1 – 25 of 86) sorted by relevance

1234

/external/XNNPACK/src/qs8-gemm/gen/
D3x16-minmax-rndnu-neon-mlal-lane-prfm.c87 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm() local
89 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
90 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
91 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
92 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
93 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
94 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
239 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm() local
243 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
244 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
[all …]
D3x16-minmax-rndnu-neon-mlal-lane.c87 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane() local
89 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
90 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
91 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
92 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
93 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
94 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
237 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane() local
241 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
242 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane-prfm.c129 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm() local
131 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
132 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
133 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
134 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
135 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
136 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
137 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
138 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
139 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane.c129 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
131 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
132 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
133 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
134 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
135 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
136 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
137 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
138 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
139 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c102 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
104 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
105 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
106 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
107 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
108 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
109 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
110 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
284 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
[all …]
D4x16-minmax-rndnu-neon-mlal-lane.c101 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
103 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
104 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
105 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
106 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
107 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
108 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
109 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
283 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
[all …]
/external/XNNPACK/src/qc8-gemm/gen/
D3x16-minmax-fp32-neonv8-mlal-lane-prfm.c88 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm() local
90 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
91 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
92 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
93 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
94 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
95 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
240 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm() local
244 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
245 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
[all …]
D3x16-minmax-fp32-neonv8-mlal-lane.c88 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane() local
90 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
91 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
92 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
93 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
94 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
95 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
238 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane() local
242 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
243 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neon-mlal-lane-prfm.c129 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm() local
131 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
132 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
133 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
134 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
135 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
136 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
137 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
138 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
139 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane-prfm.c130 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm() local
132 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
133 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
134 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
135 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
136 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
137 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
138 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
139 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
140 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane.c130 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane() local
132 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
133 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
134 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
135 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
136 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
137 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
138 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
139 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
140 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neon-mlal-lane.c129 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane() local
131 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
132 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
133 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
134 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
135 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
136 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
137 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
138 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
139 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
[all …]
/external/XNNPACK/src/qc8-igemm/gen/
D6x16-minmax-fp32-neon-mlal-lane-prfm.c150 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm() local
152 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
153 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
154 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
155 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
156 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
157 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
158 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
159 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
160 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane.c151 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane() local
153 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
154 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
155 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
156 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
157 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
158 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
159 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
160 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
161 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane-prfm.c151 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm() local
153 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
154 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
155 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
156 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
157 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
158 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
159 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
160 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
161 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neon-mlal-lane.c150 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane() local
152 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
153 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
154 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
155 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
156 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
157 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
158 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
159 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
160 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c118 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
120 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
121 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
122 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
123 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
124 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
125 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
126 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
127 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
300 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane-prfm.c150 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm() local
152 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
153 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
154 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
155 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
156 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
157 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
158 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
159 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
160 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane.c150 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
152 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
153 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
154 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
155 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
156 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
157 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
158 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
159 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
160 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c118 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
120 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
121 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
122 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
123 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
124 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
125 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
126 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
127 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
300 const int16x8_t vxb89ABCDEFc0 = vmovl_s8(vb89ABCDEFc0); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
[all …]
/external/XNNPACK/src/qu8-gemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane.c130 const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
132 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
133 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
134 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
135 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
136 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
137 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
138 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
139 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
140 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c102 const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
104 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
105 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
106 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
107 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
108 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
109 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
110 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
111 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
284 const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c103 const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
105 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
106 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
107 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
108 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
109 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
110 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
111 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
112 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
285 const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
[all …]
/external/XNNPACK/src/qu8-igemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane.c151 … const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
153 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
154 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
155 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
156 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
157 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
158 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
159 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
160 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
161 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa4), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-rndnu-neon-mlal-lane.c119 … const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
121 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
122 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
123 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
124 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
125 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
126 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
127 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
128 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
301 … const int16x8_t vxb89ABCDEFc0 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc0, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
[all …]

1234