/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x25-minmax-fp32-wasmsimd-mul16.c | 164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local 174 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 183 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 192 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 201 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 210 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 219 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 228 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 237 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 246 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() [all …]
|
D | up8x9-minmax-fp32-wasmsimd-mul16.c | 84 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() local 94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 139 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 148 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 157 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 166 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() [all …]
|
D | up8x25-minmax-rndnu-neon-mul16.c | 170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local 177 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 183 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 195 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 201 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 213 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 219 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() [all …]
|
D | up8x25-minmax-rndnu-neon-mul8-ld64.c | 170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() local 178 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 185 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 192 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 199 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 206 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 213 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 220 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 227 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() 234 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() [all …]
|
D | up8x25-minmax-fp32-neon-mul16.c | 169 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 176 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 182 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 188 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 194 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 200 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 212 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 218 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 224 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() [all …]
|
D | up8x25-minmax-fp32-neonv8-mul16.c | 169 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 176 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 182 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 188 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 194 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 200 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 212 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 218 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 224 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() [all …]
|
D | up8x9-minmax-rndnu-neon-mul8-ld64.c | 90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() local 98 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 105 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 112 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 119 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 126 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 133 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 140 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 147 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() 154 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() [all …]
|
D | up8x9-minmax-rndnu-neon-mul16.c | 90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local 97 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 103 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 115 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 133 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 139 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() [all …]
|
D | up16x25-minmax-fp32-wasmsimd-mul16.c | 164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() local 179 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 193 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 207 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 221 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 235 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 249 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 263 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 277 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 291 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() [all …]
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up8x25-minmax-fp32-wasmsimd-mul16.c | 164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local 174 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 183 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 192 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 201 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 210 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 219 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 228 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 237 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 246 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() [all …]
|
D | up8x25-minmax-fp32-neon-mul8-ld64.c | 168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() local 176 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 183 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 190 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 197 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 204 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 211 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 218 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 225 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() 232 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() [all …]
|
D | up8x9-minmax-fp32-wasmsimd-mul16.c | 84 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() local 94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 139 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 148 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 157 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 166 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() [all …]
|
D | up8x25-minmax-fp32-neonv8-mul16.c | 168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 175 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 181 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 187 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 193 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 199 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 211 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 217 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 223 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() [all …]
|
D | up8x25-minmax-fp32-neonv8-mul8-ld64.c | 168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() local 176 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 183 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 190 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 197 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 204 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 211 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 218 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 225 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() 232 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() [all …]
|
D | up8x25-minmax-fp32-neon-mul16.c | 168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 175 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 181 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 187 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 193 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 199 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 211 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 217 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 223 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() [all …]
|
D | up16x25-minmax-fp32-wasmsimd-mul16.c | 164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() local 179 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 193 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 207 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 221 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 235 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 249 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 263 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 277 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() 291 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() [all …]
|
D | up8x9-minmax-fp32-neon-mul8-ld64.c | 88 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() local 96 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 103 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 110 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 117 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 124 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 131 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 138 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 145 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() 152 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() [all …]
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up8x25-minmax-fp32-wasmsimd-mul16.c | 165 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local 175 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 185 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 195 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 205 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 215 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 225 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 235 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 245 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() 255 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() [all …]
|
D | up8x9-minmax-fp32-wasmsimd-mul16.c | 85 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() local 95 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 105 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 115 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 125 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 135 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 145 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 155 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 165 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() 175 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() [all …]
|
D | up8x25-minmax-rndnu-neon-mul8.c | 173 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() local 182 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 190 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 198 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 206 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 214 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 222 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 230 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 238 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() 246 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() [all …]
|
D | up8x25-minmax-rndnu-neon-mul16.c | 171 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local 178 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 184 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 190 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 196 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 202 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 208 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 214 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 220 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() 226 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() [all …]
|
D | up8x25-minmax-fp32-neonv8-mul16.c | 170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local 177 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 183 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 195 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 201 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 213 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 219 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() 225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() [all …]
|
D | up8x25-minmax-fp32-neon-mul16.c | 170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local 177 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 183 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 195 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 201 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 213 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 219 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() 225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() [all …]
|
D | up8x9-minmax-rndnu-neon-mul16.c | 91 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local 98 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 104 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 110 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 116 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 122 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 128 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 134 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 140 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() 146 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() [all …]
|
D | up8x9-minmax-rndnu-neon-mul8.c | 93 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() local 102 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 110 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 118 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 126 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 134 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 142 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 150 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 158 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() 166 …vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() [all …]
|