Home
last modified time | relevance | path

Searched refs:vacc4567 (Results 1 – 25 of 761) sorted by relevance

12345678910>>...31

/external/XNNPACK/src/qs8-dwconv/gen/
Dup8x25-minmax-fp32-wasmsimd-mul16.c164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local
174 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
183 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
192 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
201 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
210 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
219 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
228 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
237 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
246 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
[all …]
Dup8x9-minmax-fp32-wasmsimd-mul16.c84 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() local
94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
139 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
148 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
157 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
166 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
[all …]
Dup8x25-minmax-rndnu-neon-mul16.c170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local
177 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
183 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
195 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
201 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
213 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
219 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
[all …]
Dup8x25-minmax-rndnu-neon-mul8-ld64.c170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64() local
178 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
185 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
192 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
199 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
206 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
213 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
220 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
227 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
234 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64()
[all …]
Dup8x25-minmax-fp32-neon-mul16.c169 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local
176 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
182 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
188 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
194 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
200 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
212 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
218 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
224 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
[all …]
Dup8x25-minmax-fp32-neonv8-mul16.c169 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local
176 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
182 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
188 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
194 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
200 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
206 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
212 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
218 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
224 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
[all …]
Dup8x9-minmax-rndnu-neon-mul8-ld64.c90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64() local
98 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
105 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
112 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
119 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
126 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
133 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
140 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
147 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
154 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64()
[all …]
Dup8x9-minmax-rndnu-neon-mul16.c90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
97 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
103 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
115 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
133 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
139 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
[all …]
Dup16x25-minmax-fp32-wasmsimd-mul16.c164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() local
179 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
193 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
207 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
221 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
235 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
249 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
263 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
277 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
291 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
[all …]
/external/XNNPACK/src/qc8-dwconv/gen/
Dup8x25-minmax-fp32-wasmsimd-mul16.c164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local
174 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
183 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
192 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
201 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
210 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
219 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
228 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
237 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
246 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
[all …]
Dup8x25-minmax-fp32-neon-mul8-ld64.c168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64() local
176 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
183 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
190 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
197 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
204 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
211 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
218 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
225 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
232 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64()
[all …]
Dup8x9-minmax-fp32-wasmsimd-mul16.c84 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() local
94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
139 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
148 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
157 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
166 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
[all …]
Dup8x25-minmax-fp32-neonv8-mul16.c168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local
175 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
181 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
187 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
193 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
199 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
211 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
217 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
223 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
[all …]
Dup8x25-minmax-fp32-neonv8-mul8-ld64.c168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64() local
176 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
183 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
190 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
197 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
204 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
211 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
218 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
225 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
232 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul8_ld64()
[all …]
Dup8x25-minmax-fp32-neon-mul16.c168 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local
175 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
181 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
187 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
193 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
199 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
205 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
211 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
217 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
223 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
[all …]
Dup16x25-minmax-fp32-wasmsimd-mul16.c164 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16() local
179 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
193 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
207 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
221 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
235 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
249 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
263 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
277 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
291 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_extend_high_i16x8(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16()
[all …]
Dup8x9-minmax-fp32-neon-mul8-ld64.c88 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64() local
96 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
103 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
110 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
117 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
124 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
131 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
138 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
145 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
152 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vprod01234567)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul8_ld64()
[all …]
/external/XNNPACK/src/qu8-dwconv/gen/
Dup8x25-minmax-fp32-wasmsimd-mul16.c165 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16() local
175 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
185 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
195 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
205 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
215 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
225 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
235 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
245 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
255 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16()
[all …]
Dup8x9-minmax-fp32-wasmsimd-mul16.c85 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16() local
95 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
105 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
115 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
125 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
135 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
145 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
155 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
165 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
175 vacc4567 = wasm_i32x4_add(vacc4567, wasm_u32x4_extend_high_u16x8(vprod01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16()
[all …]
Dup8x25-minmax-rndnu-neon-mul8.c173 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8() local
182vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
190vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
198vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
206vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
214vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
222vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
230vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
238vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
246vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8()
[all …]
Dup8x25-minmax-rndnu-neon-mul16.c171 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16() local
178 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
184 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
190 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
196 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
202 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
208 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
214 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
220 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
226 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16()
[all …]
Dup8x25-minmax-fp32-neonv8-mul16.c170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16() local
177 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
183 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
195 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
201 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
213 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
219 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16()
[all …]
Dup8x25-minmax-fp32-neon-mul16.c170 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16() local
177 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
183 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
195 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
201 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
207 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
213 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
219 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
225 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16()
[all …]
Dup8x9-minmax-rndnu-neon-mul16.c91 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16() local
98 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
104 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
110 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
116 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
122 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
128 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
134 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
140 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
146 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16()
[all …]
Dup8x9-minmax-rndnu-neon-mul8.c93 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((const int32_t*) w + 4); in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8() local
102vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
110vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
118vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
126vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
134vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
142vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
150vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
158vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
166vacc4567 = vreinterpretq_s32_u32(vaddw_u16(vreinterpretq_u32_s32(vacc4567), vget_high_u16(vprod012… in xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8()
[all …]

12345678910>>...31