Lines Matching refs:vreinterpret_s8_s16
108 …int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
109 …int16x8_t vprod1x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
110 …int16x8_t vprod2x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
111 …int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
113 …vprod0x0123c0 = vmlal_s8(vprod0x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
114 …vprod1x0123c0 = vmlal_s8(vprod1x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
115 …vprod2x0123c0 = vmlal_s8(vprod2x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
116 …vprod3x0123c0 = vmlal_s8(vprod3x0123c0, vb0123c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
121 …int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
122 …int16x8_t vprod1x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
123 …int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
124 …int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
126 …vprod0x4567c0 = vmlal_s8(vprod0x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
127 …vprod1x4567c0 = vmlal_s8(vprod1x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
128 …vprod2x4567c0 = vmlal_s8(vprod2x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
129 …vprod3x4567c0 = vmlal_s8(vprod3x4567c0, vb4567c0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
134 …int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
135 …int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
136 …int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
137 …int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
139 …vprod0x89ABc0 = vmlal_s8(vprod0x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
140 …vprod1x89ABc0 = vmlal_s8(vprod1x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
141 …vprod2x89ABc0 = vmlal_s8(vprod2x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
142 …vprod3x89ABc0 = vmlal_s8(vprod3x89ABc0, vb89ABc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
147 …int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
148 …int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
149 …int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
150 …int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
152 …vprod0xCDEFc0 = vmlal_s8(vprod0xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
153 …vprod1xCDEFc0 = vmlal_s8(vprod1xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
154 …vprod2xCDEFc0 = vmlal_s8(vprod2xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
155 …vprod3xCDEFc0 = vmlal_s8(vprod3xCDEFc0, vbCDEFc0x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
160 …int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
161 …int16x8_t vprod1x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
162 …int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
163 …int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
165 …vprod0x0123c1 = vmlal_s8(vprod0x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
166 …vprod1x0123c1 = vmlal_s8(vprod1x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
167 …vprod2x0123c1 = vmlal_s8(vprod2x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
168 …vprod3x0123c1 = vmlal_s8(vprod3x0123c1, vb0123c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
173 …int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
174 …int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
175 …int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
176 …int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
178 …vprod0x4567c1 = vmlal_s8(vprod0x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
179 …vprod1x4567c1 = vmlal_s8(vprod1x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
180 …vprod2x4567c1 = vmlal_s8(vprod2x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
181 …vprod3x4567c1 = vmlal_s8(vprod3x4567c1, vb4567c1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
186 …int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
187 …int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
188 …int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
189 …int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
191 …vprod0x89ABc1 = vmlal_s8(vprod0x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
192 …vprod1x89ABc1 = vmlal_s8(vprod1x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
193 …vprod2x89ABc1 = vmlal_s8(vprod2x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
194 …vprod3x89ABc1 = vmlal_s8(vprod3x89ABc1, vb89ABc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
199 …int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
200 …int16x8_t vprod1xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
201 …int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
202 …int16x8_t vprod3xCDEFc1 = vmull_s8(vbCDEFc1x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
204 …vprod0xCDEFc1 = vmlal_s8(vprod0xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
205 …vprod1xCDEFc1 = vmlal_s8(vprod1xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
206 …vprod2xCDEFc1 = vmlal_s8(vprod2xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
207 …vprod3xCDEFc1 = vmlal_s8(vprod3xCDEFc1, vbCDEFc1x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
212 …int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
213 …int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
214 …int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
215 …int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
217 …vprod0x0123c2 = vmlal_s8(vprod0x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
218 …vprod1x0123c2 = vmlal_s8(vprod1x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
219 …vprod2x0123c2 = vmlal_s8(vprod2x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
220 …vprod3x0123c2 = vmlal_s8(vprod3x0123c2, vb0123c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
225 …int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
226 …int16x8_t vprod1x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
227 …int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
228 …int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
230 …vprod0x4567c2 = vmlal_s8(vprod0x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
231 …vprod1x4567c2 = vmlal_s8(vprod1x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
232 …vprod2x4567c2 = vmlal_s8(vprod2x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
233 …vprod3x4567c2 = vmlal_s8(vprod3x4567c2, vb4567c2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
238 …int16x8_t vprod0x89ABc2 = vmull_s8(vb89ABc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
239 …int16x8_t vprod1x89ABc2 = vmull_s8(vb89ABc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
240 …int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
241 …int16x8_t vprod3x89ABc2 = vmull_s8(vb89ABc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
243 …vprod0x89ABc2 = vmlal_s8(vprod0x89ABc2, vb89ABc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
244 …vprod1x89ABc2 = vmlal_s8(vprod1x89ABc2, vb89ABc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
245 …vprod2x89ABc2 = vmlal_s8(vprod2x89ABc2, vb89ABc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
246 …vprod3x89ABc2 = vmlal_s8(vprod3x89ABc2, vb89ABc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
251 …int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
252 …int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
253 …int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
254 …int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
256 …vprod0xCDEFc2 = vmlal_s8(vprod0xCDEFc2, vbCDEFc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
257 …vprod1xCDEFc2 = vmlal_s8(vprod1xCDEFc2, vbCDEFc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
258 …vprod2xCDEFc2 = vmlal_s8(vprod2xCDEFc2, vbCDEFc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
259 …vprod3xCDEFc2 = vmlal_s8(vprod3xCDEFc2, vbCDEFc2x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
264 …int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
265 …int16x8_t vprod1x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
266 …int16x8_t vprod2x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
267 …int16x8_t vprod3x0123c3 = vmull_s8(vb0123c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
269 …vprod0x0123c3 = vmlal_s8(vprod0x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
270 …vprod1x0123c3 = vmlal_s8(vprod1x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
271 …vprod2x0123c3 = vmlal_s8(vprod2x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
272 …vprod3x0123c3 = vmlal_s8(vprod3x0123c3, vb0123c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
277 …int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
278 …int16x8_t vprod1x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
279 …int16x8_t vprod2x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
280 …int16x8_t vprod3x4567c3 = vmull_s8(vb4567c3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
282 …vprod0x4567c3 = vmlal_s8(vprod0x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
283 …vprod1x4567c3 = vmlal_s8(vprod1x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
284 …vprod2x4567c3 = vmlal_s8(vprod2x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
285 …vprod3x4567c3 = vmlal_s8(vprod3x4567c3, vb4567c3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
290 …int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
291 …int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
292 …int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
293 …int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
295 …vprod0x89ABc3 = vmlal_s8(vprod0x89ABc3, vb89ABc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
296 …vprod1x89ABc3 = vmlal_s8(vprod1x89ABc3, vb89ABc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
297 …vprod2x89ABc3 = vmlal_s8(vprod2x89ABc3, vb89ABc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
298 …vprod3x89ABc3 = vmlal_s8(vprod3x89ABc3, vb89ABc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
303 …int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
304 …int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
305 …int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
306 …int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3x0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
308 …vprod0xCDEFc3 = vmlal_s8(vprod0xCDEFc3, vbCDEFc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
309 …vprod1xCDEFc3 = vmlal_s8(vprod1xCDEFc3, vbCDEFc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
310 …vprod2xCDEFc3 = vmlal_s8(vprod2xCDEFc3, vbCDEFc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
311 …vprod3xCDEFc3 = vmlal_s8(vprod3xCDEFc3, vbCDEFc3x1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
343 …const int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
344 …const int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
345 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
346 …const int16x8_t vprod0x0123c3 = vmull_s8(vb0123c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
351 …const int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
352 …const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
353 …const int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
354 …const int16x8_t vprod0x4567c3 = vmull_s8(vb4567c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
359 …const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
360 …const int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
361 …const int16x8_t vprod0x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
362 …const int16x8_t vprod0x89ABc3 = vmull_s8(vb89ABc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
367 …const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
368 …const int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
369 …const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
370 …const int16x8_t vprod0xCDEFc3 = vmull_s8(vbCDEFc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
375 …const int16x8_t vprod1x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
376 …const int16x8_t vprod1x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
377 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
378 …const int16x8_t vprod1x0123c3 = vmull_s8(vb0123c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
383 …const int16x8_t vprod1x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
384 …const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
385 …const int16x8_t vprod1x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
386 …const int16x8_t vprod1x4567c3 = vmull_s8(vb4567c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
391 …const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
392 …const int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
393 …const int16x8_t vprod1x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
394 …const int16x8_t vprod1x89ABc3 = vmull_s8(vb89ABc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
399 …const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
400 …const int16x8_t vprod1xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
401 …const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
402 …const int16x8_t vprod1xCDEFc3 = vmull_s8(vbCDEFc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
407 …const int16x8_t vprod2x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
408 …const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
409 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
410 …const int16x8_t vprod2x0123c3 = vmull_s8(vb0123c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
415 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
416 …const int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
417 …const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
418 …const int16x8_t vprod2x4567c3 = vmull_s8(vb4567c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
423 …const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
424 …const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
425 …const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
426 …const int16x8_t vprod2x89ABc3 = vmull_s8(vb89ABc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
431 …const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
432 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
433 …const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
434 …const int16x8_t vprod2xCDEFc3 = vmull_s8(vbCDEFc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
439 …const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
440 …const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
441 …const int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
442 …const int16x8_t vprod3x0123c3 = vmull_s8(vb0123c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
447 …const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
448 …const int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
449 …const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
450 …const int16x8_t vprod3x4567c3 = vmull_s8(vb4567c3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
455 …const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
456 …const int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
457 …const int16x8_t vprod3x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
458 …const int16x8_t vprod3x89ABc3 = vmull_s8(vb89ABc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
463 …const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
464 …const int16x8_t vprod3xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
465 …const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
466 …const int16x8_t vprod3xCDEFc3 = vmull_s8(vbCDEFc3, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
486 …const int16x8_t vprod0x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
488 …const int16x8_t vprod0x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
490 …const int16x8_t vprod0x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
492 …const int16x8_t vprod0xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
494 …const int16x8_t vprod1x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
496 …const int16x8_t vprod1x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
498 …const int16x8_t vprod1x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
500 …const int16x8_t vprod1xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
502 …const int16x8_t vprod2x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
504 …const int16x8_t vprod2x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
506 …const int16x8_t vprod2x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
508 …const int16x8_t vprod2xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
510 …const int16x8_t vprod3x0123c0 = vmull_s8(vb0123c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
512 …const int16x8_t vprod3x4567c0 = vmull_s8(vb4567c0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
514 …const int16x8_t vprod3x89ABc0 = vmull_s8(vb89ABc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
516 …const int16x8_t vprod3xCDEFc0 = vmull_s8(vbCDEFc0, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
525 …const int16x8_t vprod0x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
527 …const int16x8_t vprod0x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
529 …const int16x8_t vprod0x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
531 …const int16x8_t vprod0xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
533 …const int16x8_t vprod1x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
535 …const int16x8_t vprod1x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
537 …const int16x8_t vprod1x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
539 …const int16x8_t vprod1xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
541 …const int16x8_t vprod2x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
543 …const int16x8_t vprod2x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
545 …const int16x8_t vprod2x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
547 …const int16x8_t vprod2xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
549 …const int16x8_t vprod3x0123c1 = vmull_s8(vb0123c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
551 …const int16x8_t vprod3x4567c1 = vmull_s8(vb4567c1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
553 …const int16x8_t vprod3x89ABc1 = vmull_s8(vb89ABc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
555 …const int16x8_t vprod3xCDEFc1 = vmull_s8(vbCDEFc1, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
564 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
566 …const int16x8_t vprod0x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
568 …const int16x8_t vprod0x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
570 …const int16x8_t vprod0xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
572 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
574 …const int16x8_t vprod1x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
576 …const int16x8_t vprod1x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
578 …const int16x8_t vprod1xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
580 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
582 …const int16x8_t vprod2x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
584 …const int16x8_t vprod2x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
586 …const int16x8_t vprod2xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
588 …const int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
590 …const int16x8_t vprod3x4567c2 = vmull_s8(vb4567c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
592 …const int16x8_t vprod3x89ABc2 = vmull_s8(vb89ABc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()
594 …const int16x8_t vprod3xCDEFc2 = vmull_s8(vbCDEFc2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup()