Home
last modified time | relevance | path

Searched refs:vprod2x0 (Results 1 – 25 of 29) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D3x8c8-minmax-neon-mlal-padal.c103 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
106 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
109 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
193 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
196 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x4c8-xw-minmax-wasmsimd.c88 const v128_t vprod2x0 = wasm_i16x8_mul(vxa2, vxb0); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c88 const v128_t vprod2x0 = wasm_i16x8_mul(vxb0, vxa2); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
99 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c88 const v128_t vprod2x0 = wasm_i16x8_mul(vxa2, vxb0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c16-minmax-neon-mlal-padal.c99 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
102 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
105 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c119 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
123 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
127 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
234 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
238 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
118 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
122 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c135 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
138 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
141 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
305 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
308 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
134 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
137 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c159 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
163 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
167 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
378 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
382 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c92 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
95 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c107 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
111 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c154 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
158 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
162 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-wasmsimd-ld128.c103 const v128_t vprod2x0 = wasm_i16x8_mul(vxb0, vxa2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
114 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c103 const v128_t vprod2x0 = wasm_i16x8_mul(vxa2, vxb0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
105 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D3x8c8-minmax-neon-mlal-padal.c118 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
121 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
124 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
208 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
211 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
117 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
120 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c136 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
140 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
144 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
251 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
255 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
D4x8c16-minmax-neon-mlal-padal.c131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local
135 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
139 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
D3x16c8-minmax-neon-mlal-padal.c150 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
153 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
156 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
320 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local
323 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c146 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local
149 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
152 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
D4x16c8-minmax-neon-mlal-padal.c176 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
180 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
184 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
395 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local
399 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c107 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
110 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c124 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
128 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D4x16c16-minmax-neon-mlal-padal.c171 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local
175 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
179 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()

12