/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x2-minmax-scalar.c | 49 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() local 51 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 53 float vacc2x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 55 float vacc3x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 74 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 87 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 97 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_4x2__scalar() 110 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x2__scalar()
|
D | 3x3-minmax-scalar.c | 45 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 48 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 51 float vacc2x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 70 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 84 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 95 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 110 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 127 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 2x4-minmax-scalar.c | 41 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 45 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 64 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 77 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 87 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 100 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 115 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x4-minmax-scalar.c | 49 float vacc0x1 = w[1]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 53 float vacc1x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 57 float vacc2x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 61 float vacc3x1 = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 84 vacc0x1 += va0 * vb1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 105 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 123 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 150 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 171 c0[1] = vacc0x1; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c2-minmax-wasm-2x.c | 52 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() local 62 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 67 vacc0x1 = __builtin_wasm_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 72 vacc0x1 = __builtin_wasm_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x() 77 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x()
|
D | c2-minmax-scalar-2x.c | 52 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() local 62 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 67 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 72 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x() 77 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x()
|
D | c4-minmax-wasm-2x.c | 54 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 70 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 79 vacc0x1 = __builtin_wasm_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 88 vacc0x1 = __builtin_wasm_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 97 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 54 float vacc0x1 = i0[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 70 vacc0x1 = vacc0x1 * vscale1 + vbias1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 79 vacc0x1 = math_max_f32(vacc0x1, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 88 vacc0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 97 o0[1] = vacc0x1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x2-minmax-scalar.c | 49 float vacc0x1 = *w++; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 50 float vacc1x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 51 float vacc2x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 52 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 53 float vacc4x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 54 float vacc5x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 55 float vacc6x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 56 float vacc7x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 79 vacc0x1 += vi0 * vw1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 97 float vout0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() [all …]
|
D | 8x4-minmax-scalar.c | 49 float vacc0x1 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 50 float vacc1x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 51 float vacc2x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 52 float vacc3x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 53 float vacc4x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 54 float vacc5x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 55 float vacc6x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 56 float vacc7x1 = vacc0x1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 97 vacc0x1 += vi0 * vw1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 131 float vout0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 46 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 82 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 46 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 67 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 86 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 46 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 64 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 65 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 82 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 52 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 56 v128_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 106 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 52 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 56 v128_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 84 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 110 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 52 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 56 v128_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 79 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 80 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 106 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 58 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 62 v128_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 66 v128_t vacc2x1 = vacc0x1; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 130 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 58 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 62 v128_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 66 v128_t vacc2x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 101 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 134 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 58 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 62 v128_t vacc1x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 66 v128_t vacc2x1 = vacc0x1; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 94 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 95 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 130 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 49 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 76 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 95 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 49 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 75 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 78 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 99 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 53 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 57 v128_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 91 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 97 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 125 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 53 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 57 v128_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 92 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 93 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 121 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 57 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 61 v128_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 65 v128_t vacc2x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 107 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 116 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 151 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 57 v128_t vacc0x1 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[1]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 61 v128_t vacc1x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 65 v128_t vacc2x1 = vacc0x1; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 109 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_low_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 110 vacc0x1 = wasm_i32x4_add(vacc0x1, wasm_i32x4_widen_high_i16x8(vprod0x1)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 147 …_t vacc0x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|