Home
last modified time | relevance | path

Searched refs:vxb89ABCDEFc1 (Results 1 – 25 of 86) sorted by relevance

1234

/external/XNNPACK/src/qs8-gemm/gen/
D3x16-minmax-rndnu-neon-mlal-lane-prfm.c105 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm() local
107 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
108 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
109 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
110 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
111 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
112 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
258 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm() local
262 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
263 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm()
[all …]
D3x16-minmax-rndnu-neon-mlal-lane.c105 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane() local
107 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
108 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
109 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
110 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
111 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
112 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
256 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane() local
260 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
261 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane-prfm.c159 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm() local
161 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
162 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
163 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
164 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
165 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
166 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
167 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
168 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
169 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane.c159 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
161 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
162 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
163 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
164 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
165 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
166 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
167 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
168 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
169 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c124 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
126 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
127 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
128 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
129 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
130 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
131 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
132 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
307 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
[all …]
D4x16-minmax-rndnu-neon-mlal-lane.c123 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
125 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
126 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
127 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
128 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
129 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
130 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
131 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
132 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
306 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
[all …]
/external/XNNPACK/src/qc8-gemm/gen/
D3x16-minmax-fp32-neonv8-mlal-lane-prfm.c106 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm() local
108 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
109 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
110 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
111 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
112 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
113 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
259 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm() local
263 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
264 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm()
[all …]
D3x16-minmax-fp32-neonv8-mlal-lane.c106 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane() local
108 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
109 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
110 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
111 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
112 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
113 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
257 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane() local
261 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
262 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neon-mlal-lane-prfm.c159 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm() local
161 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
162 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
163 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
164 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
165 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
166 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
167 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
168 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
169 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane-prfm.c160 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm() local
162 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
163 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
164 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
165 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
166 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
167 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
168 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
169 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
170 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane.c160 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane() local
162 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
163 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
164 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
165 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
166 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
167 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
168 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
169 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
170 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neon-mlal-lane.c159 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane() local
161 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
162 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
163 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
164 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
165 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
166 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
167 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
168 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
169 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
[all …]
/external/XNNPACK/src/qc8-igemm/gen/
D6x16-minmax-fp32-neon-mlal-lane-prfm.c180 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm() local
182 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
183 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
184 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
185 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
186 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
187 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
188 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
189 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
190 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane.c181 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane() local
183 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
184 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
185 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
186 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
187 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
188 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
189 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
190 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
191 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane()
[all …]
D6x16-minmax-fp32-neonv8-mlal-lane-prfm.c181 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm() local
183 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
184 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
185 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
186 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
187 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
188 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
189 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
190 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
191 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm()
[all …]
D6x16-minmax-fp32-neon-mlal-lane.c180 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane() local
182 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
183 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
184 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
185 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
186 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
187 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
188 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
189 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
190 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c140 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
142 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
143 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
144 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
145 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
146 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
147 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
148 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
149 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
323 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane-prfm.c180 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm() local
182 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
183 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
184 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
185 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
186 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
187 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
188 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
189 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
190 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm()
[all …]
D6x16-minmax-rndnu-neon-mlal-lane.c180 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
182 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
183 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
184 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
185 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
186 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
187 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
188 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
189 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
190 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c140 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
142 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
143 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
144 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
145 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
146 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
147 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
148 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
149 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
323 const int16x8_t vxb89ABCDEFc1 = vmovl_s8(vb89ABCDEFc1); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
[all …]
/external/XNNPACK/src/qu8-gemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane.c160 const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
162 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
163 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
164 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
165 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
166 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
167 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
168 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
169 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
170 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-fp32-neon-mlal-lane.c124 const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
126 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
127 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
128 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
129 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
130 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
131 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
132 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
133 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane()
307 … const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane() local
[all …]
D4x16-minmax-fp32-neonv8-mlal-lane.c125 const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
127 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
128 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
129 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
130 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
131 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
132 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
133 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
134 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane()
308 … const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane() local
[all …]
/external/XNNPACK/src/qu8-igemm/gen/
D6x16-minmax-rndnu-neon-mlal-lane.c181 … const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane() local
183 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
184 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
185 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
186 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
187 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
188 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
189 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
190 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
191 vacc4x89AB = vmlal_lane_s16(vacc4x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa4), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane()
[all …]
D4x16-minmax-rndnu-neon-mlal-lane.c141 … const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
143 vacc0x89AB = vmlal_lane_s16(vacc0x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
144 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
145 vacc1x89AB = vmlal_lane_s16(vacc1x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
146 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
147 vacc2x89AB = vmlal_lane_s16(vacc2x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
148 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa2), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
149 vacc3x89AB = vmlal_lane_s16(vacc3x89AB, vget_low_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
150 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane()
324 … const int16x8_t vxb89ABCDEFc1 = vreinterpretq_s16_u16(vsubl_u8(vb89ABCDEFc1, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane() local
[all …]

1234