/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 69 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 70 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 71 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 109 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 110 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 58 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 60 const v128_t vprod0x0 = wasm_i16x8_mul(vxb0, vxa0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 61 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 65 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 58 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 67 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 68 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 69 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 85 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 87 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 89 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 150 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 152 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 70 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 72 const v128_t vprod0x0 = wasm_i16x8_mul(vxb0, vxa0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 73 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 79 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 70 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 85 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 86 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 87 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 157 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 158 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 101 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 104 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 107 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 191 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 194 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 82 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 84 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 86 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 82 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 83 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 84 const v128_t vprod0x0 = wasm_i16x8_mul(vxb0, vxa0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 93 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 80 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 81 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 82 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 120 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 121 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 69 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 70 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 71 const v128_t vprod0x0 = wasm_i16x8_mul(vxb0, vxa0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 76 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 78 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 79 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 80 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 98 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 100 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 102 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 163 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 165 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 85 const v128_t vprod0x0 = wasm_i16x8_mul(vxb0, vxa0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 86 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 92 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 83 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 96 int16x8_t vprod0x0 = vmull_s8(vb0x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 97 vprod0x0 = vmlal_s8(vprod0x0, vb0x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 98 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 168 const int16x8_t vprod0x0 = vmull_s8(vb0, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 169 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 95 int16x8_t vprod0x0 = vmull_s8(vget_low_s8(vb0), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 97 vprod0x0 = vmlal_s8(vprod0x0, vget_high_s8(vb0), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 99 vacc0x0 = vpadalq_s16(vacc0x0, vprod0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 99 const v128_t vprod0x0 = wasm_i16x8_mul(vxb0, vxa0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 100 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 108 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 97 const v128_t vprod0x0 = wasm_i16x8_mul(vxa0, vxb0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 98 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 99 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|