Home
last modified time | relevance | path

Searched refs:vacc1x2 (Results 1 – 25 of 94) sorted by relevance

1234

/external/XNNPACK/src/f32-ppmm/gen/
D2x4-minmax-scalar.c46 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local
67 vacc1x2 += va1 * vb2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
80 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
90 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
97 c1[2] = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
117 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
D3x3-minmax-scalar.c49 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local
74 vacc1x2 += va1 * vb2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
88 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
99 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
108 c1[2] = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
130 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
D4x4-minmax-scalar.c54 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local
89 vacc1x2 += va1 * vb2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
110 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
128 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
147 c1[2] = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
175 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc4-minmax-wasm-2x.c60 float vacc1x2 = i1[2]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local
75 vacc1x2 = vacc1x2 * vscale2 + vbias2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
84 vacc1x2 = __builtin_wasm_max_f32(vacc1x2, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
93 vacc1x2 = __builtin_wasm_min_f32(vacc1x2, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
103 o1[2] = vacc1x2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
Dc4-minmax-scalar-2x.c60 float vacc1x2 = i1[2]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local
75 vacc1x2 = vacc1x2 * vscale2 + vbias2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
84 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
93 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
103 o1[2] = vacc1x2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
/external/XNNPACK/src/qs8-gemm/gen/
D2x4c8-xw-minmax-wasmsimd.c57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
90 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
91 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
107 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
93 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
100 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
111 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
90 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
91 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
107 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x4c8-xw-minmax-wasmsimd.c63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
108 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
109 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
131 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
111 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
120 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
135 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
108 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
109 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
131 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D2x4c8-xw-minmax-sse2.c56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() local
80 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
92 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
D2x4c8-minmax-sse2-ld128.c56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() local
85 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
94 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
D2x4c8-minmax-sse2-ld64.c56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() local
83 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
96 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
D2x4c8-xw-minmax-xop.c61 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local
85 vacc1x2 = _mm_maddd_epi16(vxa1, vxb2, vacc1x2); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
D2x8c8-minmax-neon-mull-padal.c60 int32x4_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
88 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
124 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
148 const int32x2_t vpsum1x2 = vadd_s32(vget_low_s32(vacc1x2), vget_high_s32(vacc1x2)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x4c8-minmax-sse41-ld64.c56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local
83 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
/external/XNNPACK/src/qs8-igemm/gen/
D2x4c8-minmax-wasmsimd-ld128.c58 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
106 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
113 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
126 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c58 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
103 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
104 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
122 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x4c8-minmax-wasmsimd-ld128.c62 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
126 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
135 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
152 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c62 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
123 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
124 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
148 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D2x4c8-minmax-sse2-ld128.c57 __m128i vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() local
98 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
109 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
D2x4c8-minmax-sse2-ld64.c57 __m128i vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() local
96 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
111 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
D2x4c8-minmax-xop-ld64.c62 __m128i vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local
101 vacc1x2 = _mm_maddd_epi16(vxa1, vxb2, vacc1x2); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
/external/XNNPACK/src/f32-prelu/gen/
Dwasm-2x4.c74 float vacc1x2 = __builtin_wasm_max_f32(vi1x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() local
85 vacc1x2 += vi1x2 * vw2; in xnn_f32_prelu_ukernel__wasm_2x4()
95 o1[2] = vacc1x2; in xnn_f32_prelu_ukernel__wasm_2x4()

1234