/external/XNNPACK/src/f32-ppmm/gen/ |
D | 2x4-minmax-scalar.c | 43 float vacc0x3 = w[3]; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 47 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 68 vacc0x3 += va0 * vb3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 81 vacc0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 91 vacc0x3 = math_max_f32(vacc0x3, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 102 c0[3] = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x4-minmax-scalar.c | 51 float vacc0x3 = w[3]; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 55 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 59 float vacc2x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 63 float vacc3x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 92 vacc0x3 += va0 * vb3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 113 vacc0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 131 vacc0x3 = math_max_f32(vacc0x3, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 152 c0[3] = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-minmax-wasm-2x.c | 56 float vacc0x3 = i0[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 72 vacc0x3 = vacc0x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 81 vacc0x3 = __builtin_wasm_max_f32(vacc0x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 90 vacc0x3 = __builtin_wasm_min_f32(vacc0x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 99 o0[3] = vacc0x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 56 float vacc0x3 = i0[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 72 vacc0x3 = vacc0x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 81 vacc0x3 = math_max_f32(vacc0x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 90 vacc0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 99 o0[3] = vacc0x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x4-minmax-scalar.c | 65 float vacc0x3 = *w++; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 66 float vacc1x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 67 float vacc2x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 68 float vacc3x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 69 float vacc4x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 70 float vacc5x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 71 float vacc6x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 72 float vacc7x3 = vacc0x3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 113 vacc0x3 += vi0 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 147 float vout0x3 = math_min_f32(vacc0x3, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 48 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 82 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 48 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 76 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 79 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 86 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 1x4c8-xw-minmax-wasmsimd.c | 48 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() local 74 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 75 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd() 82 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd()
|
D | 2x4c8-xw-minmax-wasmsimd.c | 54 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 95 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 106 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 54 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 102 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 110 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 54 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 95 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 96 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 106 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 60 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 68 v128_t vacc2x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 116 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 117 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 130 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 60 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 68 v128_t vacc2x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 116 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 125 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 134 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 60 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 68 v128_t vacc2x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 116 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 117 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 130 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 1x8c8-minmax-neon-mlal-padal.c | 47 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local 83 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 119 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 138 const int32x4_t vsum0x23 = vpaddq_s32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() 147 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
|
D | 1x4c8-xw-minmax-sse2.c | 47 __m128i vacc0x3 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[3]); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2() local 67 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2() 74 … vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(vacc0x1, vacc0x… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
|
D | 1x4c8-minmax-sse2-ld128.c | 47 __m128i vacc0x3 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() local 69 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128() 76 … vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(vacc0x1, vacc0x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128()
|
D | 1x4c8-minmax-sse2-ld64.c | 47 __m128i vacc0x3 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[3]); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64() local 71 vacc0x3 = _mm_add_epi32(vacc0x3, _mm_madd_epi16(vxa0, vxb3)); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64() 78 … vacc0x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x1, vacc0x3), _mm_unpackhi_epi32(vacc0x1, vacc0x… in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64()
|
D | 1x8c8-minmax-neon-mull-padal.c | 47 …int32x4_t vacc0x3 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local 70 vacc0x3 = vpadalq_s16(vacc0x3, vprod0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() 89 const int32x4_t vsum0x23 = vpaddq_s32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() 98 const int32x2_t vpsum0x3 = vadd_s32(vget_low_s32(vacc0x3), vget_high_s32(vacc0x3)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c8-minmax-wasmsimd-ld64.c | 51 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() local 85 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 86 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64() 95 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64()
|
D | 1x4c8-minmax-wasmsimd-ld128.c | 51 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() local 87 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 90 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128() 99 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 55 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 59 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 109 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 115 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 125 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 55 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 59 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 108 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 109 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 121 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 59 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 63 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 67 v128_t vacc2x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 131 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 140 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 151 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 59 v128_t vacc0x3 = wasm_f32x4_replace_lane(vzero, 0, ((const float*) w)[3]); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 63 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 67 v128_t vacc2x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 131 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_low_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 132 vacc0x3 = wasm_i32x4_add(vacc0x3, wasm_i32x4_widen_high_i16x8(vprod0x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 147 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc0x1, vacc0x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc0x1, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|