Home
last modified time | relevance | path

Searched refs:vprod1x6 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mlal-padal.c128 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
130 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
132 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
181 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
183 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c162 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
165 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
168 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
234 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
237 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c119 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
121 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
123 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c152 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
155 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
158 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c196 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
200 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
204 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
287 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
291 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c152 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
154 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
156 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
261 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
263 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c143 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
145 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
147 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c185 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
189 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
193 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c106 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
108 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c194 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
197 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
200 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
346 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
349 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c184 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
187 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
190 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c236 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
240 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
244 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
431 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
435 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c141 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
143 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
145 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
194 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
196 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c132 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
134 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
136 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c177 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
180 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
183 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
249 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
252 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c167 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
170 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
173 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c165 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
167 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
169 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
274 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
276 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c213 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
217 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
221 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
304 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
308 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c202 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
206 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
210 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c156 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
158 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
160 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c119 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
121 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c209 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
212 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
215 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
361 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
364 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c199 int16x8_t vprod1x6 = vmull_s8(vget_low_s8(vb6), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
202 vprod1x6 = vmlal_s8(vprod1x6, vget_high_s8(vb6), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
205 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c253 int16x8_t vprod1x6 = vmull_s8(vb6x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
257 vprod1x6 = vmlal_s8(vprod1x6, vb6x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
261 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
448 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
452 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c148 const int16x8_t vprod1x6 = vmull_s8(vb6, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
151 vacc1x6 = vpadalq_s16(vacc1x6, vprod1x6); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()

12