/external/XNNPACK/src/f32-pavgpool/ |
D | 9p8x-minmax-wasmsimd-x86-c4.c | 184 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 287 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 289 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 316 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 318 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
D | 9p8x-minmax-wasmsimd-arm-c4.c | 184 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 287 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 289 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 316 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 318 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
D | 9p8x-minmax-neon-c4.c | 166 const float32x4_t vsum01a = vaddq_f32(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 168 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 258 const float32x4_t vsum01a = vaddq_f32(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 260 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 286 const float32x4_t vsum01a = vaddq_f32(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 288 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4()
|
D | 9p8x-minmax-sse-c4.c | 183 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 185 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 285 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 287 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 314 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 316 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4()
|
D | 9p8x-minmax-scalar-c1.c | 167 const float vsum01a = vsum01 + vacc; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() 259 const float vsum01a = vsum01 + vacc; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 261 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1()
|
D | 9p8x-minmax-wasm-c1.c | 167 const float vsum01a = vsum01 + vacc; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() 259 const float vsum01a = vsum01 + vacc; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 261 const float vsum0167a = vsum01a + vsum67; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1()
|
/external/XNNPACK/src/f32-avgpool/ |
D | 9p8x-minmax-wasmsimd-x86-c4.c | 184 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 284 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 286 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 313 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 315 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
D | 9p8x-minmax-neon-c4.c | 166 const float32x4_t vsum01a = vaddq_f32(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 168 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 257 const float32x4_t vsum01a = vaddq_f32(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 259 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 285 const float32x4_t vsum01a = vaddq_f32(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 287 const float32x4_t vsum0167a = vaddq_f32(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4()
|
D | 9p8x-minmax-wasmsimd-arm-c4.c | 184 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 186 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 284 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 286 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 313 const v128_t vsum01a = wasm_f32x4_add(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 315 const v128_t vsum0167a = wasm_f32x4_add(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
D | 9p8x-minmax-sse-c4.c | 183 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 185 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 282 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 284 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 311 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 313 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4()
|
D | 9p8x-minmax-wasm-c1.c | 167 const float vsum01a = vsum01 + vacc; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() 257 const float vsum01a = vsum01 + vacc; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 259 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1()
|
D | 9p8x-minmax-scalar-c1.c | 167 const float vsum01a = vsum01 + vacc; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 169 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() 257 const float vsum01a = vsum01 + vacc; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 259 const float vsum0167a = vsum01a + vsum67; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1()
|
/external/XNNPACK/src/f16-pavgpool/ |
D | 9p8x-minmax-neonfp16arith-c8.c | 166 const float16x8_t vsum01a = vaddq_f16(vsum01, vacc); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 168 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 258 const float16x8_t vsum01a = vaddq_f16(vsum01, vacc); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 260 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 286 const float16x8_t vsum01a = vaddq_f16(vsum01, vacc); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 288 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8()
|
D | 9p8x-minmax-avx2-c8.c | 186 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 188 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 289 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 291 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 318 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 320 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8()
|
/external/XNNPACK/src/f16-avgpool/ |
D | 9p8x-minmax-neonfp16arith-c8.c | 166 const float16x8_t vsum01a = vaddq_f16(vsum01, vacc); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 168 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 257 const float16x8_t vsum01a = vaddq_f16(vsum01, vacc); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 259 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() 285 const float16x8_t vsum01a = vaddq_f16(vsum01, vacc); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8() local 287 const float16x8_t vsum0167a = vaddq_f16(vsum01a, vsum67); in xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8()
|
D | 9p8x-minmax-f16c-c8.c | 186 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 188 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 287 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 289 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 316 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 318 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8()
|
/external/XNNPACK/src/amalgam/ |
D | f16c.c | 195 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 197 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 296 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 298 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() 325 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8() local 327 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8()
|
D | sse.c | 201 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 203 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 300 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 302 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 329 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 331 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 6174 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 6176 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 6276 const __m128 vsum01a = _mm_add_ps(vsum01, vacc); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 6278 const __m128 vsum0167a = _mm_add_ps(vsum01a, vsum67); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() [all …]
|
D | avx2.c | 786 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 788 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 889 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 891 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() 918 …const __m256 vsum01a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01, vacc), _MM_FROUND_NO_… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8() local 920 …const __m256 vsum0167a = _mm256_cvtph_ps(_mm256_cvtps_ph(_mm256_add_ps(vsum01a, vsum67), _MM_FROUN… in xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8()
|