Home
last modified time | relevance | path

Searched refs:vprod1x1 (Results 1 – 25 of 46) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-minmax-neon-mlal-padal.c93 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
95 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
97 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
156 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
158 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-xw-minmax-wasmsimd.c81 const v128_t vprod1x1 = wasm_i16x8_mul(vxa1, vxb1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
82 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
83 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c80 const v128_t vprod1x1 = wasm_i16x8_mul(vxb1, vxa1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
81 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
85 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c81 const v128_t vprod1x1 = wasm_i16x8_mul(vxa1, vxb1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
82 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
83 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c112 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
115 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
118 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
199 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
202 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c89 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
91 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
93 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c96 const v128_t vprod1x1 = wasm_i16x8_mul(vxa1, vxb1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
97 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c94 const v128_t vprod1x1 = wasm_i16x8_mul(vxb1, vxa1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
95 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
102 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c96 const v128_t vprod1x1 = wasm_i16x8_mul(vxa1, vxb1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
97 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c16-minmax-neon-mlal-padal.c107 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
110 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
113 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c131 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
135 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
139 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
242 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
246 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c117 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
119 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
121 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
236 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
238 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c113 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local
115 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
117 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c125 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
129 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
133 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D2x8c8-minmax-neon-mlal-padal.c106 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
108 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
110 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
169 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
171 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld128.c93 const v128_t vprod1x1 = wasm_i16x8_mul(vxb1, vxa1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
94 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c94 const v128_t vprod1x1 = wasm_i16x8_mul(vxa1, vxb1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
95 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
96 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D2x8c16-minmax-neon-mlal-padal.c102 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
104 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
106 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-minmax-wasmsimd-ld128.c109 const v128_t vprod1x1 = wasm_i16x8_mul(vxb1, vxa1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
110 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
117 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c111 const v128_t vprod1x1 = wasm_i16x8_mul(vxa1, vxb1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
112 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
113 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c127 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
130 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
133 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
214 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
217 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c122 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
125 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
128 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D2x16c8-minmax-neon-mlal-padal.c130 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
132 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
134 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
249 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local
251 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c148 int16x8_t vprod1x1 = vmull_s8(vb1x0, va1x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
152 vprod1x1 = vmlal_s8(vprod1x1, vb1x1, va1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
156 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
259 const int16x8_t vprod1x1 = vmull_s8(vb1, va1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
263 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c142 int16x8_t vprod1x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
146 vprod1x1 = vmlal_s8(vprod1x1, vget_high_s8(vb1), vget_high_s8(va1)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
150 vacc1x1 = vpadalq_s16(vacc1x1, vprod1x1); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()

12