/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-wasmsimd-mul16.c | 103 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 104 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 105 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 106 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 107 vaccGHIJ = wasm_i32x4_add(vaccGHIJ, wasm_i32x4_widen_low_i16x8(vprod0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 108 vaccKLMN = wasm_i32x4_add(vaccKLMN, wasm_i32x4_widen_high_i16x8(vprod0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 122 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 123 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 124 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 125 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() [all …]
|
D | up16x9-minmax-wasmsimd-mul16.c | 98 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 99 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 100 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 101 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 112 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 113 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 114 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 115 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 126 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 127 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() [all …]
|
D | up8x9-minmax-wasmsimd-mul16.c | 93 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 102 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 111 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 120 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 129 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() 130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-wasmsimd-x32.c | 46 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 47 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 48 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 49 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 50 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 51 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 52 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 53 …v128_t vaccSTUV = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 55 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() 56 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32() [all …]
|
D | minmax-wasmsimd-x24.c | 44 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 45 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 46 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 47 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 48 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 49 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 51 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 52 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 53 …vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy89ABCDEF), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() 54 …vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy89ABCDEF), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24() [all …]
|
D | minmax-wasmsimd-x16.c | 42 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 43 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 44 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 45 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 47 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 48 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 49 …vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy89ABCDEF), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 50 …vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy89ABCDEF), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 52 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() 53 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() [all …]
|
D | minmax-wasmsimd-x8.c | 40 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 41 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 43 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 44 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 46 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 47 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 68 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 69 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 71 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() 72 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-xw-minmax-wasmsimd.c | 83 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 86 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 97 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() [all …]
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 93 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 95 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 96 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 98 vacc2x1 = wasm_i32x4_add(vacc2x1, wasm_i32x4_widen_low_i16x8(vprod2x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 99 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 101 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() [all …]
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 83 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 86 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 97 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() [all …]
|
D | 2x4c8-xw-minmax-wasmsimd.c | 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 74 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 82 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 83 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() [all …]
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 73 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 79 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 81 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 82 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 84 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 85 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 91 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 93 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() [all …]
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 74 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 82 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 83 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() [all …]
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 81 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 82 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() [all …]
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 61 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 65 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 67 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 73 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 76 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 77 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 79 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 85 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 86 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() [all …]
|
D | 1x4c8-xw-minmax-wasmsimd.c | 59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 81 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 82 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() [all …]
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-wasmsimd-ld128.c | 100 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 102 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 107 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 108 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 110 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 111 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 113 vacc2x1 = wasm_i32x4_add(vacc2x1, wasm_i32x4_widen_low_i16x8(vprod2x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 114 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 116 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() [all …]
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 98 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 99 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 101 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 102 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 105 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 109 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 110 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 112 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 113 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() [all …]
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 86 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 88 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 91 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 92 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 94 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 95 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 97 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 104 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 106 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() [all …]
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 88 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 93 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 95 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 96 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 100 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 101 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() [all …]
|
D | 1x4c8-minmax-wasmsimd-ld64.c | 70 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 76 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 80 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 81 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 85 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 86 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 94 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 95 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() [all …]
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 76 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 84 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 87 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 90 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 98 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 99 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-wasmsimd-x32.c | 33 …vzero_point_product = wasm_i32x4_add(vzero_point_product, wasm_v128_load(params->wasmsimd.zero_poi… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 42 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 43 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 44 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 45 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 46 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 47 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 48 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 49 …v128_t vaccSTUV = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() 51 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32() [all …]
|
D | minmax-wasmsimd-x24.c | 33 …vzero_point_product = wasm_i32x4_add(vzero_point_product, wasm_v128_load(params->wasmsimd.zero_poi… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 41 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 42 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 43 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 44 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 45 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 46 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 48 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 49 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() 50 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24() [all …]
|
D | minmax-wasmsimd-x16.c | 33 …vzero_point_product = wasm_i32x4_add(vzero_point_product, wasm_v128_load(params->wasmsimd.zero_poi… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 40 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 41 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 42 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 43 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 45 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 46 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 47 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 48 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() 72 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() [all …]
|