/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x8c8-minmax-neon-mlal-padal.c | 103 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 106 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 109 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 193 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 196 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 88 const v128_t vprod2x0 = wasm_i16x8_mul(vxa2, vxb0); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 88 const v128_t vprod2x0 = wasm_i16x8_mul(vxb0, vxa2); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 99 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 88 const v128_t vprod2x0 = wasm_i16x8_mul(vxa2, vxb0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 99 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 102 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 105 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 119 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 123 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 127 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() 234 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 238 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 118 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal() 122 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 135 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 138 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 141 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 305 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 308 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 134 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 137 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 159 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 163 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 167 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 378 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 382 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 92 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local 95 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 107 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local 111 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 154 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 158 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 162 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-wasmsimd-ld128.c | 103 const v128_t vprod2x0 = wasm_i16x8_mul(vxb0, vxa2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 114 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 103 const v128_t vprod2x0 = wasm_i16x8_mul(vxa2, vxb0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 105 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 118 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 121 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 124 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() 208 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 211 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 3x8c16-minmax-neon-mlal-padal.c | 114 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local 117 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 120 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
|
D | 4x8c8-minmax-neon-mlal-padal.c | 136 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 140 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 144 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() 251 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal() local 255 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal()
|
D | 4x8c16-minmax-neon-mlal-padal.c | 131 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() local 135 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 139 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 150 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 153 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 156 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 320 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 323 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 146 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 149 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 152 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 176 int16x8_t vprod2x0 = vmull_s8(vb0x0, va2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 180 vprod2x0 = vmlal_s8(vprod2x0, vb0x1, va2x1); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 184 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 395 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 399 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mull-padal.c | 107 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local 110 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
|
D | 4x8c8-minmax-neon-mull-padal.c | 124 const int16x8_t vprod2x0 = vmull_s8(vb0, va2); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local 128 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 171 int16x8_t vprod2x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 175 vprod2x0 = vmlal_s8(vprod2x0, vget_high_s8(vb0), vget_high_s8(va2)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 179 vacc2x0 = vpadalq_s16(vacc2x0, vprod2x0); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|