Home
last modified time | relevance | path

Searched refs:vprod1x2 (Results 1 – 25 of 46) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mlal-padal.c100 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
102 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
104 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
161 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
163 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-xw-minmax-wasmsimd.c89 const v128_t vprod1x2 = wasm_i16x8_mul(vxa1, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
90 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
91 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c92 const v128_t vprod1x2 = wasm_i16x8_mul(vxb2, vxa1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
93 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
100 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c89 const v128_t vprod1x2 = wasm_i16x8_mul(vxa1, vxb2); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
90 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
91 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c122 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
125 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
128 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
206 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
209 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c95 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
97 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
99 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c107 const v128_t vprod1x2 = wasm_i16x8_mul(vxa1, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
108 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
109 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c110 const v128_t vprod1x2 = wasm_i16x8_mul(vxb2, vxa1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
111 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
120 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c107 const v128_t vprod1x2 = wasm_i16x8_mul(vxa1, vxb2); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
108 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
109 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c16-minmax-neon-mlal-padal.c116 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
119 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
122 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c144 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
148 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
152 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
251 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
255 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c124 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
126 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
128 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
241 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
243 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c119 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
121 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
123 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c137 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
141 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
145 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c113 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
115 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
117 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
174 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
176 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld128.c105 const v128_t vprod1x2 = wasm_i16x8_mul(vxb2, vxa1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
106 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
113 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c102 const v128_t vprod1x2 = wasm_i16x8_mul(vxa1, vxb2); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
103 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
104 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D2x8c16-minmax-neon-mlal-padal.c108 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
110 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
112 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-minmax-wasmsimd-ld128.c125 const v128_t vprod1x2 = wasm_i16x8_mul(vxb2, vxa1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
126 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
135 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c122 const v128_t vprod1x2 = wasm_i16x8_mul(vxa1, vxb2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
123 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
124 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c137 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
140 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
143 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
221 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
224 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c131 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
134 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
137 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c137 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
139 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
141 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
254 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
256 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c161 int16x8_t vprod1x2 = vmull_s8(vb2x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
165 vprod1x2 = vmlal_s8(vprod1x2, vb2x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
169 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
268 const int16x8_t vprod1x2 = vmull_s8(vb2, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
272 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c154 int16x8_t vprod1x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
158 vprod1x2 = vmlal_s8(vprod1x2, vget_high_s8(vb2), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
162 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()

12