/external/XNNPACK/src/f32-ppmm/gen/ |
D | 3x3-minmax-scalar.c | 46 float vacc0x2 = w[2]; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 49 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 52 float vacc2x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 73 vacc0x2 += va0 * vb2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 87 vacc0x2 = math_min_f32(vacc0x2, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 98 vacc0x2 = math_max_f32(vacc0x2, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 111 c0[2] = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 131 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 2x4-minmax-scalar.c | 42 float vacc0x2 = w[2]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 46 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 66 vacc0x2 += va0 * vb2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 79 vacc0x2 = math_min_f32(vacc0x2, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 89 vacc0x2 = math_max_f32(vacc0x2, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 101 c0[2] = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 118 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x4-minmax-scalar.c | 50 float vacc0x2 = w[2]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 54 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 58 float vacc2x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 62 float vacc3x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 88 vacc0x2 += va0 * vb2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 109 vacc0x2 = math_min_f32(vacc0x2, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 127 vacc0x2 = math_max_f32(vacc0x2, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 151 c0[2] = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 176 vacc0x0 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-minmax-wasm-2x.c | 55 float vacc0x2 = i0[2]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 71 vacc0x2 = vacc0x2 * vscale2 + vbias2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 80 vacc0x2 = __builtin_wasm_max_f32(vacc0x2, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 89 vacc0x2 = __builtin_wasm_min_f32(vacc0x2, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 98 o0[2] = vacc0x2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 55 float vacc0x2 = i0[2]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 71 vacc0x2 = vacc0x2 * vscale2 + vbias2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 80 vacc0x2 = math_max_f32(vacc0x2, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 89 vacc0x2 = math_min_f32(vacc0x2, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 98 o0[2] = vacc0x2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x4-minmax-scalar.c | 57 float vacc0x2 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 58 float vacc1x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 59 float vacc2x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 60 float vacc3x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 61 float vacc4x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 62 float vacc5x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 63 float vacc6x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 64 float vacc7x2 = vacc0x2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 105 vacc0x2 += vi0 * vw2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 139 float vout0x2 = math_min_f32(vacc0x2, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 47 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 81 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 47 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 73 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 77 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 85 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 47 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 69 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 70 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 81 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 53 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 105 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 53 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 91 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 97 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 109 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 53 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 87 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 105 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 59 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 67 v128_t vacc2x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 105 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 106 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 129 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 59 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 67 v128_t vacc2x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 109 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 117 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 133 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 59 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 67 v128_t vacc2x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 105 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 106 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 129 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 46 …int32x4_t vacc0x2 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 79 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 116 vacc0x2 = vpadalq_s16(vacc0x2, vprod0x2); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 138 const int32x4_t vsum0x23 = vpaddq_s32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 146 const int32x2_t vpsum0x2 = vadd_s32(vget_low_s32(vacc0x2), vget_high_s32(vacc0x2)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-xw-minmax-sse2.c | 46 __m128i vacc0x2 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[2]); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2() local 64 vacc0x2 = _mm_add_epi32(vacc0x2, _mm_madd_epi16(vxa0, vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2() 73 … vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(vacc0x0, vacc0x… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
|
D | 1x4c8-minmax-sse2-ld128.c | 46 __m128i vacc0x2 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() local 68 vacc0x2 = _mm_add_epi32(vacc0x2, _mm_madd_epi16(vxa0, vxb2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() 75 … vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(vacc0x0, vacc0x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128()
|
D | 1x4c8-minmax-sse2-ld64.c | 46 __m128i vacc0x2 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[2]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64() local 67 vacc0x2 = _mm_add_epi32(vacc0x2, _mm_madd_epi16(vxa0, vxb2)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64() 77 … vacc0x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x0, vacc0x2), _mm_unpackhi_epi32(vacc0x0, vacc0x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 50 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 80 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 81 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 94 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 50 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 84 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 88 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 98 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 54 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 58 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 104 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 110 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 124 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 54 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 58 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 100 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 101 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 120 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 58 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 62 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 66 v128_t vacc2x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 124 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 132 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 150 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 58 v128_t vacc0x2 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[2]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 62 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 66 v128_t vacc2x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 120 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_low_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 121 vacc0x2 = wasm_i32x4_add(vacc0x2, wasm_i32x4_widen_high_i16x8(vprod0x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 146 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x0, vacc0x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x0, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|