Home
last modified time | relevance | path

Searched refs:vprod1x4 (Results 1 – 25 of 36) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mlal-padal.c114 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
116 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
118 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
171 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
173 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c142 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
145 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
148 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
220 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
223 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c107 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
109 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
111 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c134 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
137 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
140 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c170 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
174 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
178 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
269 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
273 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c138 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
140 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
142 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
251 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
253 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c131 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
133 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
135 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c161 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
165 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
169 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c96 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
98 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c174 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
177 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
180 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
332 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
335 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c166 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
169 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
172 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c210 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
214 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
218 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
413 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
417 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c127 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
129 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
131 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
184 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
186 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c120 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
122 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
124 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c157 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
160 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
163 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
235 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
238 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c149 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
152 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
155 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c151 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
153 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
155 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
264 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
266 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c187 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
191 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
195 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
286 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
290 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c178 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
182 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
186 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c144 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
146 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
148 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c109 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
111 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D3x16c8-minmax-neon-mlal-padal.c189 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
192 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
195 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
347 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
350 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c181 int16x8_t vprod1x4 = vmull_s8(vget_low_s8(vb4), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
184 vprod1x4 = vmlal_s8(vprod1x4, vget_high_s8(vb4), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
187 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c227 int16x8_t vprod1x4 = vmull_s8(vb4x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
231 vprod1x4 = vmlal_s8(vprod1x4, vb4x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
235 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
430 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
434 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c134 const int16x8_t vprod1x4 = vmull_s8(vb4, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
137 vacc1x4 = vpadalq_s16(vacc1x4, vprod1x4); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()

12