Home
last modified time | relevance | path

Searched refs:vprod0x2 (Results 1 – 25 of 63) sorted by relevance

123

/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-minmax-neon-mlal-padal.c77 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
78 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
79 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
115 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
116 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x4c8-minmax-wasmsimd-ld64.c68 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local
69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
D1x4c8-minmax-wasmsimd-ld128.c72 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local
73 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
77 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
D1x4c8-xw-minmax-wasmsimd.c68 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local
69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
D1x8c16-minmax-neon-mlal-padal.c73 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
74 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
75 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c99 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
101 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
103 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
160 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
162 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-xw-minmax-wasmsimd.c86 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c90 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
91 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
97 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c86 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D1x16c8-minmax-neon-mlal-padal.c93 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
94 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
95 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
163 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
164 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c121 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
124 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
127 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
205 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
208 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c94 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
96 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
98 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c104 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
105 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
106 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c108 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
109 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
117 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-neon-mlal-padal.c88 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
89 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
90 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
126 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
127 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x4c8-minmax-wasmsimd-ld64.c79 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local
80 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
81 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
D1x4c8-minmax-wasmsimd-ld128.c83 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local
84 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
D1x8c16-minmax-neon-mlal-padal.c84 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
85 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
86 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c112 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
114 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
116 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
173 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
175 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld128.c103 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
104 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
110 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c99 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
100 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
101 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D1x16c8-minmax-neon-mlal-padal.c104 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
105 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
106 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
174 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
175 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c107 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
109 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
111 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-minmax-wasmsimd-ld128.c123 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
124 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
132 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c119 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
120 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
121 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()

123