Home
last modified time | relevance | path

Searched refs:vprod0x1 (Results 1 – 25 of 63) sorted by relevance

123

/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-minmax-neon-mlal-padal.c73 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
74 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
75 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
112 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
113 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x4c8-minmax-wasmsimd-ld64.c63 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local
64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
D1x4c8-minmax-wasmsimd-ld128.c63 const v128_t vprod0x1 = wasm_i16x8_mul(vxb1, vxa0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local
64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
67 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
D1x4c8-xw-minmax-wasmsimd.c63 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local
64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
D1x8c16-minmax-neon-mlal-padal.c70 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
71 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
72 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c92 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
94 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
96 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
155 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
157 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-xw-minmax-wasmsimd.c78 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c77 const v128_t vprod0x1 = wasm_i16x8_mul(vxb1, vxa0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
84 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c78 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D1x16c8-minmax-neon-mlal-padal.c89 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
90 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
91 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
160 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
161 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c111 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
114 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
117 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
198 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
201 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c88 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
90 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
92 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c93 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c91 const v128_t vprod0x1 = wasm_i16x8_mul(vxb1, vxa0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
101 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-neon-mlal-padal.c84 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
85 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
86 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
123 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
124 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x4c8-minmax-wasmsimd-ld64.c74 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local
75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
76 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
D1x4c8-minmax-wasmsimd-ld128.c74 const v128_t vprod0x1 = wasm_i16x8_mul(vxb1, vxa0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local
75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
D1x8c16-minmax-neon-mlal-padal.c81 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
82 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
83 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mlal-padal.c105 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
107 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
109 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
168 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
170 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x4c8-minmax-wasmsimd-ld128.c90 const v128_t vprod0x1 = wasm_i16x8_mul(vxb1, vxa0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
91 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
97 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c91 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
93 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D1x16c8-minmax-neon-mlal-padal.c100 int16x8_t vprod0x1 = vmull_s8(vb1x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
101 vprod0x1 = vmlal_s8(vprod0x1, vb1x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
102 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
171 const int16x8_t vprod0x1 = vmull_s8(vb1, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
172 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c101 int16x8_t vprod0x1 = vmull_s8(vget_low_s8(vb1), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
103 vprod0x1 = vmlal_s8(vprod0x1, vget_high_s8(vb1), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
105 vacc0x1 = vpadalq_s16(vacc0x1, vprod0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x4c8-minmax-wasmsimd-ld128.c106 const v128_t vprod0x1 = wasm_i16x8_mul(vxb1, vxa0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
107 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
116 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c108 const v128_t vprod0x1 = wasm_i16x8_mul(vxa0, vxb1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
109 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
110 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()

123