/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up32x25-minmax-rndnu-neon-mul16.c | 176 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() local 195 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 213 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 231 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 249 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 267 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 285 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 303 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 321 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 339 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() [all …]
|
D | up32x25-minmax-fp32-neon-mul16.c | 175 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() local 194 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 212 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 230 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 248 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 266 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 284 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 302 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 320 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 338 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() [all …]
|
D | up32x9-minmax-rndnu-neon-mul16.c | 96 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() local 115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 151 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 169 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 187 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 223 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 241 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 259 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() [all …]
|
D | up32x25-minmax-fp32-neonv8-mul16.c | 175 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() local 194 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 212 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 230 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 248 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 266 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 284 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 302 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 320 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 338 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() [all …]
|
D | up32x9-minmax-fp32-neon-mul16.c | 95 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local 114 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 132 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 150 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 168 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 186 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 204 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 222 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 240 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 258 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() [all …]
|
D | up32x9-minmax-fp32-neonv8-mul16.c | 95 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local 114 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 132 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 150 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 168 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 186 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 204 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 222 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 240 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 258 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() [all …]
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up32x25-minmax-fp32-neon-mul16.c | 174 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() local 193 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 211 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 229 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 247 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 265 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 283 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 301 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 319 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 337 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() [all …]
|
D | up32x25-minmax-fp32-neonv8-mul16.c | 174 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() local 193 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 211 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 229 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 247 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 265 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 283 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 301 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 319 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 337 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() [all …]
|
D | up32x9-minmax-fp32-neon-mul16.c | 94 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local 113 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 131 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 149 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 167 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 185 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 203 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 221 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 239 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 257 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() [all …]
|
D | up32x9-minmax-fp32-neonv8-mul16.c | 94 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local 113 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 131 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 149 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 167 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 185 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 203 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 221 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 239 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 257 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() [all …]
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up32x25-minmax-fp32-neonv8-mul16.c | 176 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() local 195 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 213 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 231 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 249 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 267 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 285 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 303 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 321 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() 339 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16() [all …]
|
D | up32x25-minmax-rndnu-neon-mul8.c | 179 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() local 203 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 229 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 255 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 281 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 307 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 333 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 359 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 385 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() 411 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8() [all …]
|
D | up32x9-minmax-rndnu-neon-mul8.c | 99 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() local 123 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 149 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 175 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 201 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 227 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 253 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 279 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 305 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() 331 …vaccSTUV = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vaccSTUV), vget_high_u16(vprodOPQ… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8() [all …]
|
D | up32x9-minmax-rndnu-neon-mul16.c | 97 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() local 116 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 134 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 152 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 170 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 188 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 206 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 224 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 242 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() 260 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16() [all …]
|
D | up32x25-minmax-rndnu-neon-mul16.c | 177 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() local 196 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 214 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 232 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 250 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 268 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 286 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 304 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 322 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() 340 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16() [all …]
|
D | up32x25-minmax-fp32-neon-mul16.c | 176 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() local 195 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 213 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 231 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 249 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 267 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 285 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 303 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 321 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() 339 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16() [all …]
|
D | up32x9-minmax-fp32-neon-mul16.c | 96 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() local 115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 151 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 169 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 187 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 223 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 241 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() 259 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16() [all …]
|
D | up32x9-minmax-fp32-neonv8-mul16.c | 96 int32x4_t vaccSTUV = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() local 115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 151 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi2xOPQRSTUV), vget_high_s16(vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 169 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi3xOPQRSTUV), vget_high_s16(vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 187 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi4xOPQRSTUV), vget_high_s16(vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 205 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi5xOPQRSTUV), vget_high_s16(vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 223 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi6xOPQRSTUV), vget_high_s16(vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 241 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi7xOPQRSTUV), vget_high_s16(vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() 259 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi8xOPQRSTUV), vget_high_s16(vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-fp32-wasmsimd-c32.c | 133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local 142 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 151 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 160 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 169 vaccSTUV = wasm_i32x4_max(vaccSTUV, vmagic_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 178 vaccSTUV = wasm_i32x4_sub(vaccSTUV, vmagic_bias_less_output_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 183 v128_t voutOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
|
D | 7p7x-minmax-fp32-wasmsimd-c32.c | 114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 123 wasm_v128_store(b + 28, vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 245 v128_t vaccSTUV = wasm_v128_load(b + 28); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 254 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 263 wasm_v128_store(b + 28, vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 409 v128_t vaccSTUV = wasm_v128_load(buffer + 28); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 419 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 428 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 437 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 446 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() [all …]
|
D | 7p7x-minmax-rndnu-neon-c32.c | 106 const int32x4_t vaccSTUV = vaddw_s16(vinit_bias, vget_high_s16(vsumOPQRSTUV)); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 115 vst1q_s32(b, vaccSTUV); b += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 216 int32x4_t vaccSTUV = vld1q_s32(b + 28); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 226 vaccSTUV = vaddw_s16(vaccSTUV, vget_high_s16(vsumOPQRSTUV)); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 235 vst1q_s32(b, vaccSTUV); b += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 360 int32x4_t vaccSTUV = vld1q_s32(buffer); buffer += 4; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() local 370 vaccSTUV = vaddw_s16(vaccSTUV, vget_high_s16(vsumOPQRSTUV)); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 379 vaccSTUV = vqshlq_s32(vaccSTUV, vleft_pre_shift); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 388 vaccSTUV = vqdmulhq_s32(vaccSTUV, vmultiplier); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() 397 vaccSTUV = vrshlq_s32(vaccSTUV, vleft_post_shift); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c32() [all …]
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7x-minmax-fp32-wasmsimd-c32.c | 133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local 142 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 151 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 160 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 169 vaccSTUV = wasm_i32x4_max(vaccSTUV, vmagic_min); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 178 vaccSTUV = wasm_i32x4_sub(vaccSTUV, vmagic_bias_less_output_zero_point); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 183 v128_t voutOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
|
D | 7p7x-minmax-fp32-wasmsimd-c32.c | 114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 123 wasm_v128_store(b + 28, vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 245 v128_t vaccSTUV = wasm_v128_load(b + 28); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 254 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 263 wasm_v128_store(b + 28, vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 409 v128_t vaccSTUV = wasm_v128_load(buffer + 28); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 419 vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 428 vaccSTUV = wasm_f32x4_convert_i32x4(vaccSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 437 vaccSTUV = wasm_f32x4_mul(vaccSTUV, vscale); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 446 vaccSTUV = wasm_f32x4_add(vaccSTUV, vmagic_bias); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-wasmsimd-x32.c | 51 …v128_t vaccSTUV = wasm_i32x4_add(vbias, wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vaOPQRSTUV), v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() local 60 …vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(vbOPQRSTUV), vb_mu… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 69 vaccSTUV = wasm_i32x4_shr(vaccSTUV, vshift); in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 74 …v128_t voutOPQRSTUV = wasm_i16x8_add_sat(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput_zero… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
|
/external/XNNPACK/src/qu8-vadd/gen/ |
D | minmax-wasmsimd-x32.c | 51 …v128_t vaccSTUV = wasm_i32x4_add(vbias, wasm_i32x4_mul(wasm_u32x4_extend_high_u16x8(vaOPQRSTUV), v… in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32() local 60 …vaccSTUV = wasm_i32x4_add(vaccSTUV, wasm_i32x4_mul(wasm_u32x4_extend_high_u16x8(vbOPQRSTUV), vb_mu… in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32() 69 vaccSTUV = wasm_i32x4_shr(vaccSTUV, vshift); in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32() 74 …v128_t voutOPQRSTUV = wasm_i16x8_add_sat(wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV), voutput_zero… in xnn_qu8_vadd_minmax_ukernel__wasmsimd_x32()
|