Home
last modified time | relevance | path

Searched refs:vaccSTUV (Results 1 – 25 of 87) sorted by relevance

1234

/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x25-minmax-rndnu-neon-mul16.c176 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() local
195 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
213 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
231 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
249 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
267 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
285 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
303 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
321 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
339 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
[all …]
Dup32x25-minmax-fp32-neon-mul16.c175 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() local
194 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
212 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
230 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
248 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
266 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
284 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
302 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
320 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
338 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
[all …]
Dup32x9-minmax-rndnu-neon-mul16.c96 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() local
115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
151 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
169 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
187 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
223 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
241 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
259 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
[all …]
Dup32x25-minmax-fp32-neonv8-mul16.c175 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() local
194 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
212 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
230 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
248 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
266 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
284 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
302 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
320 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
338 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
[all …]
Dup32x9-minmax-fp32-neon-mul16.c95 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local
114 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
132 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
150 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
168 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
186 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
204 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
222 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
240 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
258 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
[all …]
Dup32x9-minmax-fp32-neonv8-mul16.c95 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local
114 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
132 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
150 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
168 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
186 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
204 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
222 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
240 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
258 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
[all …]
/external/XNNPACK/src/qc8-dwconv/gen/
Dup32x25-minmax-fp32-neon-mul16.c174 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() local
193 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
211 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
229 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
247 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
265 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
283 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
301 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
319 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
337 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
[all …]
Dup32x25-minmax-fp32-neonv8-mul16.c174 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() local
193 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
211 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
229 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
247 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
265 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
283 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
301 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
319 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
337 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
[all …]
Dup32x9-minmax-fp32-neon-mul16.c94 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local
113 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
131 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
149 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
167 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
185 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
203 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
221 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
239 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
257 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
[all …]
Dup32x9-minmax-fp32-neonv8-mul16.c94 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local
113 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
131 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
149 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
167 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
185 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
203 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
221 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
239 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
257 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
[all …]
/external/XNNPACK/src/qu8-dwconv/gen/
Dup32x25-minmax-fp32-neonv8-mul16.c176 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() local
195 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
213 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
231 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
249 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
267 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
285 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
303 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
321 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
339 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16()
[all …]
Dup32x25-minmax-rndnu-neon-mul8.c179 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() local
203vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
229vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
255vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
281vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
307vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
333vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
359vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
385vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
411vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8()
[all …]
Dup32x9-minmax-rndnu-neon-mul8.c99 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() local
123vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
149vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
175vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
201vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
227vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
253vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
279vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
305vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
331vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8()
[all …]
Dup32x9-minmax-rndnu-neon-mul16.c97 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() local
116 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
134 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
152 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
170 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
188 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
206 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
224 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
242 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
260 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16()
[all …]
Dup32x25-minmax-rndnu-neon-mul16.c177 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() local
196 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
214 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
232 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
250 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
268 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
286 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
304 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
322 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
340 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16()
[all …]
Dup32x25-minmax-fp32-neon-mul16.c176 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() local
195 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
213 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
231 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
249 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
267 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
285 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
303 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
321 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
339 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16()
[all …]
Dup32x9-minmax-fp32-neon-mul16.c96 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local
115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
151 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
169 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
187 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
223 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
241 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
259 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16()
[all …]
Dup32x9-minmax-fp32-neonv8-mul16.c96 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local
115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
151 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
169 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
187 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
223 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
241 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
259 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16()
[all …]
/external/XNNPACK/src/qs8-gavgpool/gen/
D7x-minmax-fp32-wasmsimd-c32.c133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local
142 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
151 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
160 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
169 vaccSTUV = wasm_i32x4_max(vaccSTUV, vmagic_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
178 vaccSTUV = wasm_i32x4_sub(vaccSTUV, vmagic_bias_less_output_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
183 v128_t voutOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
D7p7x-minmax-fp32-wasmsimd-c32.c114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
123 wasm_v128_store(b + 28, vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
245 v128_t vaccSTUV = wasm_v128_load(b + 28); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
254 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
263 wasm_v128_store(b + 28, vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
409 v128_t vaccSTUV = wasm_v128_load(buffer + 28); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
419 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
428 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
437 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
446 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
[all …]
D7p7x-minmax-rndnu-neon-c32.c106 const int32x4_t vaccSTUV = vaddw_s16(vinit_bias, vget_high_s16(vsumOPQRSTUV)); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local
115 vst1q_s32(b, vaccSTUV); b += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
216 int32x4_t vaccSTUV = vld1q_s32(b + 28); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local
226 vaccSTUV = vaddw_s16(vaccSTUV, vget_high_s16(vsumOPQRSTUV)); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
235 vst1q_s32(b, vaccSTUV); b += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
360 int32x4_t vaccSTUV = vld1q_s32(buffer); buffer += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local
370 vaccSTUV = vaddw_s16(vaccSTUV, vget_high_s16(vsumOPQRSTUV)); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
379 vaccSTUV = vqshlq_s32(vaccSTUV, vleft_pre_shift); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
388 vaccSTUV = vqdmulhq_s32(vaccSTUV, vmultiplier); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
397 vaccSTUV = vrshlq_s32(vaccSTUV, vleft_post_shift); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32()
[all …]
/external/XNNPACK/src/qu8-gavgpool/gen/
D7x-minmax-fp32-wasmsimd-c32.c133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local
142 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
151 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
160 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
169 vaccSTUV = wasm_i32x4_max(vaccSTUV, vmagic_min); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
178 vaccSTUV = wasm_i32x4_sub(vaccSTUV, vmagic_bias_less_output_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
183 v128_t voutOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
D7p7x-minmax-fp32-wasmsimd-c32.c114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
123 wasm_v128_store(b + 28, vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
245 v128_t vaccSTUV = wasm_v128_load(b + 28); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
254 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
263 wasm_v128_store(b + 28, vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
409 v128_t vaccSTUV = wasm_v128_load(buffer + 28); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
419 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
428 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
437 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
446 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-wasmsimd-x32.c51 …v128_t vaccSTUV = wasm_i32x4_add(vbias, wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vaOPQRSTUV), v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local
60vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vbOPQRSTUV), vb_mu… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
69 vaccSTUV = wasm_i32x4_shr(vaccSTUV, vshift); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
74 …v128_t voutOPQRSTUV = wasm_i16x8_add_sat(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput_zero… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
/external/XNNPACK/src/qu8-vadd/gen/
Dminmax-wasmsimd-x32.c51 …v128_t vaccSTUV = wasm_i32x4_add(vbias, wasm_i32x4_mul(wasm_u32x4_extend_high_u16x8(vaOPQRSTUV), v… in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32() local
60vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_mul(wasm_u32x4_extend_high_u16x8(vbOPQRSTUV), vb_mu… in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32()
69 vaccSTUV = wasm_i32x4_shr(vaccSTUV, vshift); in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32()
74 …v128_t voutOPQRSTUV = wasm_i16x8_add_sat(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput_zero… in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32()

1234