Home
last modified time | relevance | path

Searched refs:vprod2x3 (Results 1 – 24 of 24) sorted by relevance

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-rndnu-neon-mlal.c133 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local
136 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
139 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
214 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local
217 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
D4x8c8-minmax-rndnu-neon-mlal.c158 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
162 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
166 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
261 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
265 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
D3x8c16-minmax-rndnu-neon-mlal.c126 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local
129 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
132 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
D4x8c16-minmax-rndnu-neon-mlal.c150 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
154 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
158 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
D3x16c8-minmax-rndnu-neon-mlal.c165 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
168 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
171 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
326 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
329 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D3x16c16-minmax-rndnu-neon-mlal.c158 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
161 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
164 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D3x8c8-minmax-rndnu-neon-mull.c113 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local
116 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mlal.c198 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
202 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
206 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
405 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
409 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D4x16c16-minmax-rndnu-neon-mlal.c190 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
194 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
198 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D4x8c8-minmax-rndnu-neon-mull.c134 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
138 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mull.c137 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
140 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c166 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
170 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull()
/external/XNNPACK/src/qs8-igemm/gen/
D3x8c8-minmax-rndnu-neon-mlal.c148 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local
151 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
154 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
229 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal() local
232 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal()
D4x8c8-minmax-rndnu-neon-mlal.c175 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
179 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
183 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
278 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal() local
282 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal()
D3x8c16-minmax-rndnu-neon-mlal.c141 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal() local
144 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
147 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal()
D4x8c16-minmax-rndnu-neon-mlal.c167 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal() local
171 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
175 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal()
D3x16c8-minmax-rndnu-neon-mlal.c180 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
183 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
186 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
341 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal() local
344 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal()
D3x16c16-minmax-rndnu-neon-mlal.c173 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal() local
176 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
179 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal()
D4x16c8-minmax-rndnu-neon-mlal.c215 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
219 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
223 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
422 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal() local
426 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal()
D3x8c8-minmax-rndnu-neon-mull.c128 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull() local
131 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull()
D4x16c16-minmax-rndnu-neon-mlal.c207 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal() local
211 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
215 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal()
D4x8c8-minmax-rndnu-neon-mull.c151 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull() local
155 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull()
D3x16c8-minmax-rndnu-neon-mull.c152 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull() local
155 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull()
D4x16c8-minmax-rndnu-neon-mull.c183 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull() local
187 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull()