Home
last modified time | relevance | path

Searched refs:vacc2x2 (Results 1 – 25 of 172) sorted by relevance

1234567

/external/XNNPACK/src/f32-ppmm/gen/
D3x3-minmax-scalar.c52 float vacc2x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local
75 vacc2x2 += va2 * vb2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
89 vacc2x2 = math_min_f32(vacc2x2, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
100 vacc2x2 = math_max_f32(vacc2x2, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
105 c2[2] = vacc2x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
129 vacc2x0 = vacc2x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
D4x4-minmax-scalar.c58 float vacc2x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local
90 vacc2x2 += va2 * vb2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
111 vacc2x2 = math_min_f32(vacc2x2, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
129 vacc2x2 = math_max_f32(vacc2x2, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
143 c2[2] = vacc2x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
174 vacc2x0 = vacc2x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
/external/XNNPACK/src/bf16-gemm/gen/
D3x4c8-minmax-neonbf16-bfmlal.c66 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal() local
88 vacc2x2 = vbfmlalbq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
101 vacc2x2 = vbfmlaltq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
146 vacc2x2 = vbfmlalbq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
147 vacc2x2 = vbfmlaltq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
165 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
179 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfmlal()
D3x4c8-minmax-neonfma-shland.c67 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland() local
98 vacc2x2 = vfmaq_f32(vacc2x2, va2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
120 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
179 vacc2x2 = vfmaq_f32(vacc2x2, va2x2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
210 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
222 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
236 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_shland()
D3x4c8-minmax-neonfma-zip.c67 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip() local
98 vacc2x2 = vfmaq_f32(vacc2x2, va2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
120 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
179 vacc2x2 = vfmaq_f32(vacc2x2, va2x2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
210 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
222 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
236 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonfma_zip()
D4x4c8-minmax-neonbf16-bfmlal.c72 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal() local
101 vacc2x2 = vbfmlalbq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
118 vacc2x2 = vbfmlaltq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
172 vacc2x2 = vbfmlalbq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
173 vacc2x2 = vbfmlaltq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
198 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
216 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfmlal()
D3x4c8-minmax-neonbf16-bfdot.c66 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot() local
88 vacc2x2 = vbfdotq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
125 vacc2x2 = vbfdotq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
140 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
154 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_3x4c8__neonbf16_bfdot()
D4x4c8-minmax-neonfma-zip.c73 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip() local
112 vacc2x2 = vfmaq_f32(vacc2x2, va2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
139 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
211 vacc2x2 = vfmaq_f32(vacc2x2, va2x2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
250 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
265 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
283 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_zip()
D4x4c8-minmax-neonfma-shland.c73 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland() local
112 vacc2x2 = vfmaq_f32(vacc2x2, va2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
139 vacc2x2 = vfmaq_f32(vacc2x2, va2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
211 vacc2x2 = vfmaq_f32(vacc2x2, va2x2e, vb2e); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
250 vacc2x2 = vfmaq_f32(vacc2x2, va2x2o, vb2o); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
265 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
283 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonfma_shland()
D5x4c8-minmax-neonbf16-bfmlal.c78 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal() local
114 vacc2x2 = vbfmlalbq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
135 vacc2x2 = vbfmlaltq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
198 vacc2x2 = vbfmlalbq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
199 vacc2x2 = vbfmlaltq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
231 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
253 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_5x4c8__neonbf16_bfmlal()
D4x4c8-minmax-neonbf16-bfdot.c72 float32x4_t vacc2x2 = vacc0x2; in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot() local
101 vacc2x2 = vbfdotq_f32(vacc2x2, va2, vb2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
145 vacc2x2 = vbfdotq_f32(vacc2x2, va2x2, vb2); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
165 const float32x4_t vacc2x23 = vpaddq_f32(vacc2x2, vacc2x3); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
183 const float32x2_t vsum2x2 = vadd_f32(vget_low_f32(vacc2x2), vget_high_f32(vacc2x2)); in xnn_bf16_gemm_minmax_ukernel_4x4c8__neonbf16_bfdot()
/external/XNNPACK/src/qc8-gemm/gen/
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c66 v128_t vacc2x2 = vacc0x2; in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128() local
97 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
108 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c66 v128_t vacc2x2 = vacc0x2; in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64() local
93 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64()
108 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64()
D3x4c8-minmax-fp32-sse2-ld64.c67 __m128i vacc2x2 = vacc0x2; in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64() local
100 vacc2x2 = _mm_add_epi32(vacc2x2, _mm_madd_epi16(vxa2, vxb2)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64()
116 … vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(vacc2x0, vacc2x… in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64()
D3x4c8-minmax-fp32-sse2-ld128.c67 __m128i vacc2x2 = vacc0x2; in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128() local
103 vacc2x2 = _mm_add_epi32(vacc2x2, _mm_madd_epi16(vxa2, vxb2)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128()
114 … vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(vacc2x0, vacc2x… in xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128()
/external/XNNPACK/src/qs8-gemm/gen/
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c66 v128_t vacc2x2 = vacc0x2; in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64() local
93 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64()
108 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64()
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c66 v128_t vacc2x2 = vacc0x2; in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128() local
97 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
108 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
D3x4c8-xw-minmax-fp32-wasmsimd-dot16x2.c66 v128_t vacc2x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2() local
93 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2()
108 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2()
D3x4c8-minmax-fp32-sse2-ld128.c67 __m128i vacc2x2 = vacc0x2; in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128() local
103 vacc2x2 = _mm_add_epi32(vacc2x2, _mm_madd_epi16(vxa2, vxb2)); in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128()
114 … vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(vacc2x0, vacc2x… in xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128()
/external/XNNPACK/src/qu8-gemm/gen/
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c67 v128_t vacc2x2 = vacc0x2; in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64() local
94 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64()
109 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64()
/external/XNNPACK/src/qc8-igemm/gen/
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c65 v128_t vacc2x2 = vacc0x2; in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128() local
112 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
125 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
D3x4c8-minmax-fp32-sse2-ld64.c66 __m128i vacc2x2 = vacc0x2; in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64() local
115 vacc2x2 = _mm_add_epi32(vacc2x2, _mm_madd_epi16(vxa2, vxb2)); in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64()
133 … vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(vacc2x0, vacc2x… in xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-fp32-sse2-ld128.c66 __m128i vacc2x2 = vacc0x2; in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128() local
118 vacc2x2 = _mm_add_epi32(vacc2x2, _mm_madd_epi16(vxa2, vxb2)); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128()
131 … vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(vacc2x0, vacc2x… in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128()
D3x4c8-minmax-fp32-wasmsimd-dot16x2-ld128.c65 v128_t vacc2x2 = vacc0x2; in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128() local
112 vacc2x2 = wasm_i32x4_add(vacc2x2, wasm_i32x4_dot_i16x8(vxa2, vxb2)); in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
125 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x0, vacc2x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x0, in xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128()
/external/XNNPACK/src/qu8-igemm/gen/
D3x4c8-minmax-fp32-sse2-ld64.c66 __m128i vacc2x2 = vacc0x2; in xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64() local
117 vacc2x2 = _mm_add_epi32(vacc2x2, _mm_madd_epi16(vxa2, vxb2)); in xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64()
135 … vacc2x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x0, vacc2x2), _mm_unpackhi_epi32(vacc2x0, vacc2x… in xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64()

1234567