/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 77 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 78 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 79 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 115 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 116 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 68 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 72 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 73 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 77 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 68 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 73 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 74 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 75 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 99 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 101 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 103 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() 160 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 162 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 86 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 90 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 91 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 97 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 86 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 93 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 94 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 95 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() 163 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 164 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 3x8c8-minmax-neon-mlal-padal.c | 121 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 124 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 127 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() 205 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local 208 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 94 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 96 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 98 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 104 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 105 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 106 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 108 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 109 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 117 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c8-minmax-neon-mlal-padal.c | 88 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 89 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 90 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() 126 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 127 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 79 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 80 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 81 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 83 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 84 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 84 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local 85 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 86 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x8c8-minmax-neon-mlal-padal.c | 112 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 114 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 116 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() 173 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local 175 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 103 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 104 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 110 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 99 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 100 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 101 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 1x16c8-minmax-neon-mlal-padal.c | 104 int16x8_t vprod0x2 = vmull_s8(vb2x0, va0x0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 105 vprod0x2 = vmlal_s8(vprod0x2, vb2x1, va0x1); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 106 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() 174 const int16x8_t vprod0x2 = vmull_s8(vb2, va0); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local 175 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
|
D | 2x8c16-minmax-neon-mlal-padal.c | 107 int16x8_t vprod0x2 = vmull_s8(vget_low_s8(vb2), vget_low_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local 109 vprod0x2 = vmlal_s8(vprod0x2, vget_high_s8(vb2), vget_high_s8(va0)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 111 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 123 const v128_t vprod0x2 = wasm_i16x8_mul(vxb2, vxa0); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 124 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 132 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 119 const v128_t vprod0x2 = wasm_i16x8_mul(vxa0, vxb2); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 120 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 121 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|