/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/ |
D | 4x-sumrows-neon.c | 140 int32x4_t vsum0123 = vmulq_n_s32(vreinterpretq_s32_u32(vacc0123), multiplier); in pytorch_q8sumrows_ukernel_4x__neon() local 142 vst1q_s32(a_sum, vsum0123); in pytorch_q8sumrows_ukernel_4x__neon() 145 vst1_s32(a_sum, vget_low_s32(vsum0123)); in pytorch_q8sumrows_ukernel_4x__neon() 147 vsum0123 = vextq_s32(vsum0123, vsum0123, 2); in pytorch_q8sumrows_ukernel_4x__neon() 151 vst1q_lane_s32(a_sum, vsum0123, 0); in pytorch_q8sumrows_ukernel_4x__neon()
|
/external/XNNPACK/src/f32-gavgpool/ |
D | 7p7x-minmax-wasmsimd-arm-c4.c | 99 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() 161 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() 190 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
|
D | 7p7x-minmax-wasmsimd-x86-c4.c | 99 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 102 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() 161 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 164 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() 190 const v128_t vsum0123 = wasm_f32x4_add(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 193 const v128_t vsum = wasm_f32x4_add(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
|
D | 7p7x-minmax-sse-c4.c | 98 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 101 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 160 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 163 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 189 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 192 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
|
D | 7p7x-minmax-neon-c4.c | 84 const float32x4_t vsum0123 = vaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 87 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 138 const float32x4_t vsum0123 = vaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 141 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 166 const float32x4_t vsum0123 = vaddq_f32(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 169 const float32x4_t vsum = vaddq_f32(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
|
D | 7p7x-minmax-scalar-c1.c | 83 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() 137 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
|
D | 7p7x-minmax-wasm-c1.c | 83 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 86 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() 137 const float vsum0123 = vsum01 + vsum23; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 140 const float vsum = vsum0123 + vsum456a; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
|
/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8avgpool/ |
D | mp8x9p8q-neon.c | 192 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 194 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 242 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 244 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 316 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 318 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() 392 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon() local 394 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__neon()
|
D | mp8x9p8q-sse2.c | 208 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 210 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() 266 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 268 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() 347 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 349 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() 425 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2() local 427 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in pytorch_q8avgpool_ukernel_mp8x9p8q__sse2()
|
/external/XNNPACK/src/f16-gavgpool-cw/ |
D | neonfp16arith-x8.c | 78 const float16x8_t vsum0123 = vpaddq_f16(vsum01, vsum23); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8() local 79 const float16x4_t vsum = vpadd_f16(vget_low_f16(vsum0123), vget_high_f16(vsum0123)); in xnn_f16_gavgpool_cw_ukernel__neonfp16arith_x8()
|
/external/XNNPACK/src/qu8-avgpool/ |
D | 9p8x-minmax-scalar-c1.c | 176 const uint32_t vsum0123 = vsum01 + vsum23; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1() local 178 vacc += (int32_t) vsum0123; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1() 268 const uint32_t vsum0123 = vsum01 + vsum23; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1() local 270 vacc += (int32_t) vsum0123; in xnn_qu8_avgpool_minmax_ukernel_9p8x__scalar_c1()
|
D | 9p8x-minmax-neon-c8.c | 183 const uint16x8_t vsum0123 = vaddq_u16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local 185 const uint16x8_t vsum = vaddq_u16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 278 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local 280 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 352 const int16x8_t vsum0123 = vaddq_s16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() local 354 const int16x8_t vsum = vaddq_s16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
|
D | 9p8x-minmax-sse2-c8.c | 198 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 200 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 304 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 306 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 379 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 381 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
|
/external/XNNPACK/src/amalgam/ |
D | sse.c | 4360 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 4363 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 4422 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 4425 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 4451 const __m128 vsum0123 = _mm_add_ps(vsum01, vsum23); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 4454 const __m128 vsum = _mm_add_ps(vsum0123, vsum456a); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
|
D | sse2.c | 7974 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 7976 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 8080 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 8082 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 8155 const __m128i vsum0123 = _mm_add_epi16(vsum01, vsum23); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 8157 const __m128i vsum = _mm_add_epi16(vsum0123, vsum4567); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8()
|