Home
last modified time | relevance | path

Searched refs:vsum0123 (Results 1 – 15 of 15) sorted by relevance

/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/
D4x-sumrows-neon.c140 int32x4_t vsum0123 = vmulq_n_s32(vreinterpretq_s32_u32(vacc0123), multiplier); in pytorch_q8sumrows_ukernel_4x__neon() local
142 vst1q_s32(a_sum, vsum0123); in pytorch_q8sumrows_ukernel_4x__neon()
145 vst1_s32(a_sum, vget_low_s32(vsum0123)); in pytorch_q8sumrows_ukernel_4x__neon()
147 vsum0123 = vextq_s32(vsum0123, vsum0123, 2); in pytorch_q8sumrows_ukernel_4x__neon()
151 vst1q_lane_s32(a_sum, vsum0123, 0); in pytorch_q8sumrows_ukernel_4x__neon()
/external/XNNPACK/src/f32-gavgpool/
D7p7x-minmax-wasmsimd-arm-c4.c99 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
161 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
190 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
D7p7x-minmax-wasmsimd-x86-c4.c99 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
161 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
190 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local
193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
D7p7x-minmax-sse-c4.c98 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
101 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
160 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
163 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
189 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
192 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
D7p7x-minmax-neon-c4.c84 const float32x4_t vsum0123 = vaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
87 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
138 const float32x4_t vsum0123 = vaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
141 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
166 const float32x4_t vsum0123 = vaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local
169 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
D7p7x-minmax-scalar-c1.c83 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
137 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local
140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
D7p7x-minmax-wasm-c1.c83 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
137 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local
140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/
Dmp8x9p8q-neon.c192 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
194 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
242 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
244 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
316 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
318 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
392 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local
394 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
Dmp8x9p8q-sse2.c208 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
210 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
266 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
268 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
347 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
349 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
425 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local
427 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
/external/XNNPACK/src/f16-gavgpool-cw/
Dneonfp16arith-x8.c78 const float16x8_t vsum0123 = vpaddq_f16(vsum01, vsum23); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local
79 const float16x4_t vsum = vpadd_f16(vget_low_f16(vsum0123), vget_high_f16(vsum0123)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
/external/XNNPACK/src/qu8-avgpool/
D9p8x-minmax-scalar-c1.c176 const uint32_t vsum0123 = vsum01 + vsum23; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1() local
178 vacc += (int32_t) vsum0123; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1()
268 const uint32_t vsum0123 = vsum01 + vsum23; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1() local
270 vacc += (int32_t) vsum0123; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1()
D9p8x-minmax-neon-c8.c183 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
185 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
278 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
280 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
352 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local
354 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
D9p8x-minmax-sse2-c8.c198 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
200 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
304 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
306 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
379 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
381 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
/external/XNNPACK/src/amalgam/
Dsse.c4360 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
4363 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
4422 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
4425 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
4451 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local
4454 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
Dsse2.c7974 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
7976 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
8080 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
8082 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
8155 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local
8157 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()