Home
last modified time | relevance | path

Searched refs:vprod2x2 (Results 1 – 25 of 29) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mlal-padal.c123 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
126 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
129 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
207 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
210 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c110 const v128_t vprod2x2 = wasm_i16x8_mul(vxa2, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
111 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_low_i16x8(vprod2x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
112 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_high_i16x8(vprod2x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c112 const v128_t vprod2x2 = wasm_i16x8_mul(vxb2, vxa2); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
113 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_low_i16x8(vprod2x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
123 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_high_i16x8(vprod2x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c110 const v128_t vprod2x2 = wasm_i16x8_mul(vxa2, vxb2); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
111 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_low_i16x8(vprod2x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
112 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_high_i16x8(vprod2x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c16-minmax-neon-mlal-padal.c117 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
120 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
123 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c145 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
149 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
153 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
252 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
256 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c138 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
142 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
146 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c155 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
158 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
161 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
319 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
322 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c149 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
152 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
155 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c185 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
189 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
193 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
396 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
400 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c106 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
109 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c125 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
129 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c178 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
182 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
186 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-wasmsimd-ld128.c127 const v128_t vprod2x2 = wasm_i16x8_mul(vxb2, vxa2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
128 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_low_i16x8(vprod2x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
138 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_high_i16x8(vprod2x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c125 const v128_t vprod2x2 = wasm_i16x8_mul(vxa2, vxb2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
126 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_low_i16x8(vprod2x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
127 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_widen_high_i16x8(vprod2x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c138 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
141 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
144 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
222 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
225 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c132 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
135 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
138 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c162 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
166 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
170 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
269 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
273 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c155 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
159 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
163 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c170 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
173 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
176 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
334 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
337 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c164 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
167 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
170 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c202 int16x8_t vprod2x2 = vmull_s8(vb2x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
206 vprod2x2 = vmlal_s8(vprod2x2, vb2x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
210 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
413 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
417 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c121 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
124 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c142 const int16x8_t vprod2x2 = vmull_s8(vb2, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
146 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c195 int16x8_t vprod2x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
199 vprod2x2 = vmlal_s8(vprod2x2, vget_high_s8(vb2), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
203 vacc2x2 = vpadalq_s16(vacc2x2, vprod2x2); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()

12