/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 81 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 82 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 83 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 118 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 119 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 73 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 75 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 76 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 79 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 73 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 76 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 77 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 78 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 106 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 108 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 110 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 165 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 167 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 94 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 95 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 95 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 102 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 94 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 95 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 97 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 98 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 99 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 166 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 167 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 131 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 134 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 137 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 212 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 215 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 100 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 102 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 104 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 115 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 116 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 117 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 115 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 116 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 125 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 92 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 93 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 94 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 129 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 130 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 84 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 85 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 86 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 86 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 87 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 90 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 87 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 88 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 89 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 119 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 121 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 123 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 178 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 180 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 108 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 109 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 115 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 107 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 108 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 109 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 108 int16x8_t vprod0x3 = vmull_s8(vb3x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 109 vprod0x3 = vmlal_s8(vprod0x3, vb3x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 110 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 177 const int16x8_t vprod0x3 = vmull_s8(vb3, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 178 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 113 int16x8_t vprod0x3 = vmull_s8(vget_low_s8(vb3), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 115 vprod0x3 = vmlal_s8(vprod0x3, vget_high_s8(vb3), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 117 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 130 const v128_t vprod0x3 = wasm_i16x8_mul(vxb3, vxa0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 131 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 140 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 130 const v128_t vprod0x3 = wasm_i16x8_mul(vxa0, vxb3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 131 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 132 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|