Home
last modified time | relevance | path

Searched refs:vprod1x3 (Results 1 – 25 of 46) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mlal-padal.c107 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
109 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
111 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
166 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
168 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-xw-minmax-wasmsimd.c97 const v128_t vprod1x3 = wasm_i16x8_mul(vxa1, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
98 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c98 const v128_t vprod1x3 = wasm_i16x8_mul(vxb3, vxa1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
103 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c97 const v128_t vprod1x3 = wasm_i16x8_mul(vxa1, vxb3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
98 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c132 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
135 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
138 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
213 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
216 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c101 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
103 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
105 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c118 const v128_t vprod1x3 = wasm_i16x8_mul(vxa1, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
120 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c118 const v128_t vprod1x3 = wasm_i16x8_mul(vxb3, vxa1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
126 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c118 const v128_t vprod1x3 = wasm_i16x8_mul(vxa1, vxb3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
120 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c16-minmax-neon-mlal-padal.c125 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
128 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
131 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c157 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
161 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
165 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
260 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
264 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c131 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
133 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
135 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
246 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
248 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c125 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
127 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
129 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c149 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
153 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
157 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c120 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
122 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
124 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
179 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
181 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld128.c111 const v128_t vprod1x3 = wasm_i16x8_mul(vxb3, vxa1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
112 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
116 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c110 const v128_t vprod1x3 = wasm_i16x8_mul(vxa1, vxb3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
111 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
112 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D2x8c16-minmax-neon-mlal-padal.c114 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
116 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
118 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-minmax-wasmsimd-ld128.c133 const v128_t vprod1x3 = wasm_i16x8_mul(vxb3, vxa1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
134 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
141 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c133 const v128_t vprod1x3 = wasm_i16x8_mul(vxa1, vxb3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
134 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
135 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c147 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
150 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
153 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
228 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
231 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c140 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
143 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
146 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c144 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
146 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
148 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
259 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
261 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c174 int16x8_t vprod1x3 = vmull_s8(vb3x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
178 vprod1x3 = vmlal_s8(vprod1x3, vb3x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
182 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
277 const int16x8_t vprod1x3 = vmull_s8(vb3, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
281 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c166 int16x8_t vprod1x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
170 vprod1x3 = vmlal_s8(vprod1x3, vget_high_s8(vb3), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
174 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()

12