Home
last modified time | relevance | path

Searched refs:vprod2x3 (Results 1 – 25 of 29) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mlal-padal.c133 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
136 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
139 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
214 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
217 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c121 const v128_t vprod2x3 = wasm_i16x8_mul(vxa2, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
122 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_low_i16x8(vprod2x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
123 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_high_i16x8(vprod2x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c121 const v128_t vprod2x3 = wasm_i16x8_mul(vxb3, vxa2); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
122 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_low_i16x8(vprod2x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
127 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_high_i16x8(vprod2x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c121 const v128_t vprod2x3 = wasm_i16x8_mul(vxa2, vxb3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
122 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_low_i16x8(vprod2x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
123 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_high_i16x8(vprod2x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c16-minmax-neon-mlal-padal.c126 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
129 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
132 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c158 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
162 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
166 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
261 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
265 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c150 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
154 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
158 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c165 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
168 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
171 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
326 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
329 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c158 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
161 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
164 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c198 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
202 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
206 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
405 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
409 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c113 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
116 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c134 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
138 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c190 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
194 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
198 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-wasmsimd-ld128.c136 const v128_t vprod2x3 = wasm_i16x8_mul(vxb3, vxa2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
137 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_low_i16x8(vprod2x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
142 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_high_i16x8(vprod2x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c136 const v128_t vprod2x3 = wasm_i16x8_mul(vxa2, vxb3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
137 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_low_i16x8(vprod2x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
138 vacc2x3 = wasm_i32x4_add(vacc2x3, wasm_i32x4_widen_high_i16x8(vprod2x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c148 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
151 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
154 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
229 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
232 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c141 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
144 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
147 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c175 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
179 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
183 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
278 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
282 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c167 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
171 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
175 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c180 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
183 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
186 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
341 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
344 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c173 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
176 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
179 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c215 int16x8_t vprod2x3 = vmull_s8(vb3x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
219 vprod2x3 = vmlal_s8(vprod2x3, vb3x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
223 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
422 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
426 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c128 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
131 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c151 const int16x8_t vprod2x3 = vmull_s8(vb3, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
155 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c207 int16x8_t vprod2x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
211 vprod2x3 = vmlal_s8(vprod2x3, vget_high_s8(vb3), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
215 vacc2x3 = vpadalq_s16(vacc2x3, vprod2x3); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()

12