Home
last modified time | relevance | path

Searched refs:wasm_i32x4_add (Results 1 – 25 of 67) sorted by relevance

123

/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-wasmsimd-mul16.c103 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
104 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
105 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
106 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
107 vaccGHIJ = wasm_i32x4_add(vaccGHIJ, wasm_i32x4_widen_low_i16x8(vprod0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
108 vaccKLMN = wasm_i32x4_add(vaccKLMN, wasm_i32x4_widen_high_i16x8(vprod0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
122 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
123 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
124 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
125 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16()
[all …]
Dup16x9-minmax-wasmsimd-mul16.c98 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
99 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
100 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
101 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
112 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
113 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
114 vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_widen_low_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
115 vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_widen_high_i16x8(vprod1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
126 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
127 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
[all …]
Dup8x9-minmax-wasmsimd-mul16.c93 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
102 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
111 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
120 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
129 vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_widen_low_i16x8(vprod4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-wasmsimd-x32.c46 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
47 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
48 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
49 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
50 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
51 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
52 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
53 …v128_t vaccSTUV = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
55 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
56 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x32()
[all …]
Dminmax-wasmsimd-x24.c44 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
45 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
46 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
47 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
48 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
49 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
51 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
52 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
53 …vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy89ABCDEF), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
54 …vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy89ABCDEF), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x24()
[all …]
Dminmax-wasmsimd-x16.c42 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
43 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
44 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
45 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
47 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
48 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
49 …vacc89AB = wasm_i32x4_add(vacc89AB, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy89ABCDEF), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
50 …vaccCDEF = wasm_i32x4_add(vaccCDEF, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy89ABCDEF), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
52 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
53 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
[all …]
Dminmax-wasmsimd-x8.c40 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
41 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
43 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
44 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
46 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
47 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
68 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
69 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
71 …vacc0123 = wasm_i32x4_add(vacc0123, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vy01234567), vy_mult… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
72 …vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D3x4c8-xw-minmax-wasmsimd.c83 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
86 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
97 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
[all …]
D3x4c8-minmax-wasmsimd-ld128.c85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
93 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
95 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
96 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
98 vacc2x1 = wasm_i32x4_add(vacc2x1, wasm_i32x4_widen_low_i16x8(vprod2x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
99 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
101 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
[all …]
D3x4c8-minmax-wasmsimd-ld64.c83 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
86 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
89 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
90 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
97 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
[all …]
D2x4c8-xw-minmax-wasmsimd.c71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
74 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
82 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
83 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
[all …]
D2x4c8-minmax-wasmsimd-ld128.c73 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
79 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
81 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
82 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
84 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
85 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
91 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
93 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
[all …]
D2x4c8-minmax-wasmsimd-ld64.c71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
74 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
75 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
82 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
83 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
[all …]
D1x4c8-minmax-wasmsimd-ld64.c59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
81 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
82 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
[all …]
D1x4c8-minmax-wasmsimd-ld128.c61 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
65 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
67 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
73 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
76 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
77 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
79 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
85 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
86 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
[all …]
D1x4c8-xw-minmax-wasmsimd.c59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
81 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
82 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-wasmsimd-ld128.c100 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
102 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
107 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
108 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
110 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
111 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
113 vacc2x1 = wasm_i32x4_add(vacc2x1, wasm_i32x4_widen_low_i16x8(vprod2x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
114 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
116 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
[all …]
D3x4c8-minmax-wasmsimd-ld64.c98 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
99 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
101 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
102 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
104 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_low_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
105 vacc2x0 = wasm_i32x4_add(vacc2x0, wasm_i32x4_widen_high_i16x8(vprod2x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
109 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
110 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
112 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
113 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
[all …]
D2x4c8-minmax-wasmsimd-ld128.c86 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
88 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
91 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
92 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
94 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
95 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
97 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
98 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
104 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
106 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
[all …]
D2x4c8-minmax-wasmsimd-ld64.c84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
87 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_low_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
88 vacc1x0 = wasm_i32x4_add(vacc1x0, wasm_i32x4_widen_high_i16x8(vprod1x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
93 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
95 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_low_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
96 vacc1x1 = wasm_i32x4_add(vacc1x1, wasm_i32x4_widen_high_i16x8(vprod1x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
100 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
101 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
[all …]
D1x4c8-minmax-wasmsimd-ld64.c70 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
76 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
80 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
81 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
85 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
86 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
94 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
95 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
[all …]
D1x4c8-minmax-wasmsimd-ld128.c72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
76 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
84 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
87 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
90 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
98 …const v128_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
99 …const v128_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-wasmsimd-x32.c33 …vzero_point_product = wasm_i32x4_add(vzero_point_product, wasm_v128_load(params->wasmsimd.zero_poi… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
42 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
43 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
44 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
45 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
46 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
47 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
48 …v128_t vaccOPQR = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
49 …v128_t vaccSTUV = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
51 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32()
[all …]
Dminmax-wasmsimd-x24.c33 …vzero_point_product = wasm_i32x4_add(vzero_point_product, wasm_v128_load(params->wasmsimd.zero_poi… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
41 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
42 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
43 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
44 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
45 …v128_t vaccGHIJ = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
46 …v128_t vaccKLMN = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
48 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
49 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
50 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24()
[all …]
Dminmax-wasmsimd-x16.c33 …vzero_point_product = wasm_i32x4_add(vzero_point_product, wasm_v128_load(params->wasmsimd.zero_poi… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
40 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
41 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
42 …v128_t vacc89AB = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
43 …v128_t vaccCDEF = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
45 …const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vacc0123, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
46 …const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
47 …const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vacc89AB, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
48 …const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vaccCDEF, vremainder_mask), wasm_i32x4_shr(va… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
72 …v128_t vacc0123 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8(vx… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
[all …]

123