Home
last modified time | relevance | path

Searched refs:vprod1x5 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mlal-padal.c121 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
123 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
125 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
176 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
178 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c152 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
155 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
158 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
227 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
230 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c113 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
115 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
117 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c143 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
146 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
149 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c183 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
187 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
191 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
278 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
282 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c145 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
147 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
149 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
256 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
258 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c137 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
139 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
141 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c173 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
177 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
181 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c101 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
103 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c184 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
187 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
190 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
339 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
342 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c175 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
178 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
181 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c223 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
227 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
231 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
422 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
426 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c134 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
136 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
138 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
189 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
191 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c126 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
128 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
130 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c167 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
170 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
173 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
242 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
245 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c158 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
161 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
164 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c158 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
160 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
162 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
269 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
271 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c200 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
204 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
208 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
295 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
299 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c190 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
194 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
198 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c150 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
152 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
154 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c114 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
116 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c199 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
202 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
205 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
354 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
357 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c190 int16x8_t vprod1x5 = vmull_s8(vget_low_s8(vb5), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
193 vprod1x5 = vmlal_s8(vprod1x5, vget_high_s8(vb5), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
196 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c240 int16x8_t vprod1x5 = vmull_s8(vb5x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
244 vprod1x5 = vmlal_s8(vprod1x5, vb5x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
248 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
439 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
443 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c141 const int16x8_t vprod1x5 = vmull_s8(vb5, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
144 vacc1x5 = vpadalq_s16(vacc1x5, vprod1x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()

12