/external/XNNPACK/src/f32-ppmm/gen/ |
D | 3x3-minmax-scalar.c | 44 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 47 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 50 float vacc2x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 67 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 81 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 92 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 109 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 126 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 131 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 140 *c0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 2x4-minmax-scalar.c | 40 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 44 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 62 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 75 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 85 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 99 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 114 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 118 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 125 *c0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x2-minmax-scalar.c | 48 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 50 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 52 float vacc2x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 54 float vacc3x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 70 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 83 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 93 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 109 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 125 *c0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 4x4-minmax-scalar.c | 48 float vacc0x0 = w[0]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 52 float vacc1x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 56 float vacc2x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 60 float vacc3x0 = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 80 vacc0x0 += va0 * vb0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 101 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 119 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 149 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 170 c0[0] = vacc0x0; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 176 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() [all …]
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x1-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 45 float vacc4x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 46 float vacc5x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 47 float vacc6x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 48 float vacc7x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 62 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 72 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() [all …]
|
D | 4x1-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 54 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 60 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 130 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 131 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 139 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 143 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_4x1__scalar() [all …]
|
D | 8x2-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 45 float vacc4x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 46 float vacc5x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 47 float vacc6x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 48 float vacc7x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 71 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 89 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() [all …]
|
D | 2x1-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 50 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 54 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 104 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() local 111 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_2x1__scalar() 114 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_2x1__scalar()
|
D | 8x4-minmax-scalar.c | 41 float vacc0x0 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 42 float vacc1x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 43 float vacc2x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 44 float vacc3x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 45 float vacc4x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 46 float vacc5x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 47 float vacc6x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 48 float vacc7x0 = vacc0x0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 89 vacc0x0 += vi0 * vw0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 123 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c2-minmax-wasm-2x.c | 51 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() local 61 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 66 vacc0x0 = __builtin_wasm_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 71 vacc0x0 = __builtin_wasm_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 76 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x()
|
D | c2-minmax-scalar-2x.c | 51 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() local 61 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 66 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 71 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 76 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x()
|
D | c4-minmax-wasm-2x.c | 53 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 69 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 78 vacc0x0 = __builtin_wasm_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 87 vacc0x0 = __builtin_wasm_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 96 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 53 float vacc0x0 = i0[0]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 69 vacc0x0 = vacc0x0 * vscale0 + vbias0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 78 vacc0x0 = math_max_f32(vacc0x0, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 87 vacc0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 96 o0[0] = vacc0x0; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 45 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 81 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 45 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 61 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 65 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 85 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 45 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 59 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 60 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 81 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 51 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 55 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 105 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 51 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 55 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 73 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 79 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 109 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 51 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 55 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 105 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 57 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 61 v128_t vacc1x0 = vacc0x0; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 65 v128_t vacc2x0 = vacc0x0; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 83 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 129 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 48 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 70 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 71 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 94 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 48 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 72 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 76 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 98 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 52 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 56 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 86 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 92 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 124 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 52 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 56 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 84 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 85 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 120 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 56 v128_t vacc0x0 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[0]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 60 v128_t vacc1x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 64 v128_t vacc2x0 = vacc0x0; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 100 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_low_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 108 vacc0x0 = wasm_i32x4_add(vacc0x0, wasm_i32x4_widen_high_i16x8(vprod0x0)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 150 …_t vacc0x02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|