/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-scalar-c1.c | 63 const float vi7 = *i7++; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 99 if (vi7 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 100 vmax = vi7; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 145 const float vi7 = *i7++; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 185 if (vi7 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 186 vmax = vi7; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 249 const float vi7 = *i7++; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 289 if (vi7 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 290 vmax = vi7; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1()
|
D | 9p8x-neon-c4.c | 63 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 93 const uint32x4_t vm7 = vcgtq_f32(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 94 vmax = vbslq_f32(vm7, vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 139 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 178 const uint32x4_t vm7 = vcgtq_f32(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 180 vmax = vbslq_f32(vm7, vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 242 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 281 const uint32x4_t vm7 = vcgtq_f32(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 283 vmax = vbslq_f32(vm7, vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 297 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local [all …]
|
D | 9p8x-wasmsimd-c4.c | 70 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 102 const v128_t vm7 = wasm_f32x4_gt(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 103 vmax = wasm_v128_bitselect(vi7, vmax, vm7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 157 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 197 const v128_t vm7 = wasm_f32x4_gt(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 199 vmax = wasm_v128_bitselect(vi7, vmax, vm7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 270 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 312 const v128_t vm7 = wasm_f32x4_gt(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 314 vmax = wasm_v128_bitselect(vi7, vmax, vm7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 330 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local [all …]
|
D | 9x-wasmsimd-c4.c | 90 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() local 122 const v128_t vm7 = wasm_f32x4_gt(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 123 vmax = wasm_v128_bitselect(vi7, vmax, vm7); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 143 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() local 173 const v128_t vm7 = wasm_f32x4_gt(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 174 vmax = wasm_v128_bitselect(vi7, vmax, vm7); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4()
|
D | 9x-neon-c4.c | 82 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_argmaxpool_ukernel_9x__neon_c4() local 112 const uint32x4_t vm7 = vcgtq_f32(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 113 vmax = vbslq_f32(vm7, vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 131 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() local 161 const uint32x4_t vm7 = vcgtq_f32(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 162 vmax = vbslq_f32(vm7, vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4()
|
D | 9p8x-sse2-c4.c | 70 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 102 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 103 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 157 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 197 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 199 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 270 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 312 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 314 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 330 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local [all …]
|
D | 9x-sse2-c4.c | 89 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() local 121 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 122 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 142 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() local 172 const __m128i vm7 = _mm_castps_si128(_mm_cmpgt_ps(vi7, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 173 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
|
/external/XNNPACK/src/f32-maxpool/ |
D | 9p8x-minmax-wasmsimd-x86-c4.c | 92 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 101 const v128_t vmax67 = wasm_v128_bitselect(vi7, vi6, wasm_f32x4_lt(vi6, vi7)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 131 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 140 const v128_t vmax67 = wasm_v128_bitselect(vi7, vi6, wasm_f32x4_lt(vi6, vi7)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 219 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 227 const v128_t vmax67 = wasm_v128_bitselect(vi7, vi6, wasm_f32x4_lt(vi6, vi7)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 250 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 257 const v128_t vmax67 = wasm_v128_bitselect(vi7, vi6, wasm_f32x4_lt(vi6, vi7)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
D | 9p8x-minmax-sse-c4.c | 92 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 100 const __m128 vmax67 = _mm_max_ps(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 125 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 133 const __m128 vmax67 = _mm_max_ps(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 208 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 215 const __m128 vmax67 = _mm_max_ps(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 233 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 239 const __m128 vmax67 = _mm_max_ps(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
|
D | 9p8x-minmax-wasmsimd-arm-c4.c | 92 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 100 const v128_t vmax67 = wasm_f32x4_max(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 125 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 133 const v128_t vmax67 = wasm_f32x4_max(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 207 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 214 const v128_t vmax67 = wasm_f32x4_max(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 232 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 238 const v128_t vmax67 = wasm_f32x4_max(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
D | 9p8x-minmax-neon-c4.c | 92 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() local 100 const float32x4_t vmax67 = vmaxq_f32(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 125 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() local 133 const float32x4_t vmax67 = vmaxq_f32(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 209 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() local 216 const float32x4_t vmax67 = vmaxq_f32(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() 234 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4() local 240 const float32x4_t vmax67 = vmaxq_f32(vi6, vi7); in xnn_f32_maxpool_minmax_ukernel_9p8x__neon_c4()
|
/external/XNNPACK/src/u8-maxpool/ |
D | 9p8x-minmax-scalar-c1.c | 83 const uint8_t vi7 = *i7++; in xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1() local 89 const uint8_t vmax67 = vi6 > vi7 ? vi6 : vi7; in xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1() 152 const uint8_t vi7 = *i7++; in xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1() local 158 const uint8_t vmax67 = vi6 > vi7 ? vi6 : vi7; in xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1()
|
D | 9p8x-minmax-sse2-c16.c | 89 const __m128i vi7 = _mm_loadu_si128((const __m128i*) i7); i7 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() local 95 const __m128i vmax67 = _mm_max_epu8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 112 const __m128i vi7 = _mm_loadu_si128((const __m128i*) i7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() local 118 const __m128i vmax67 = _mm_max_epu8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 196 const __m128i vi7 = _mm_loadu_si128((const __m128i*) i7); i7 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() local 202 const __m128i vmax67 = _mm_max_epu8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 220 const __m128i vi7 = _mm_loadu_si128((const __m128i*) i7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() local 226 const __m128i vmax67 = _mm_max_epu8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
|
D | 9p8x-minmax-neon-c16.c | 88 const uint8x16_t vi7 = vld1q_u8(i7); i7 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() local 94 const uint8x16_t vmax67 = vmaxq_u8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() 111 const uint8x16_t vi7 = vld1q_u8(i7); in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() local 117 const uint8x16_t vmax67 = vmaxq_u8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() 192 const uint8x16_t vi7 = vld1q_u8(i7); i7 += 16; in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() local 198 const uint8x16_t vmax67 = vmaxq_u8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() 215 const uint8x16_t vi7 = vld1q_u8(i7); in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16() local 221 const uint8x16_t vmax67 = vmaxq_u8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__neon_c16()
|
/external/XNNPACK/src/f32-pavgpool/ |
D | 9p8x-minmax-sse-c4.c | 98 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 106 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 175 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 182 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 276 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 284 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() 307 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4() local 313 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__sse_c4()
|
D | 9p8x-minmax-wasmsimd-arm-c4.c | 98 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 106 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 176 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 183 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 278 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 286 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 309 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 315 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
D | 9p8x-minmax-neon-c4.c | 91 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 97 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 159 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 165 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 251 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 257 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() 279 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4() local 285 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__neon_c4()
|
D | 9p8x-minmax-wasm-c1.c | 91 const float vi7 = *i7++; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 97 const float vsum67 = vi6 + vi7; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() 160 const float vi7 = *i7++; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 166 const float vsum67 = vi6 + vi7; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() 252 const float vi7 = *i7++; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1() local 258 const float vsum67 = vi6 + vi7; in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasm_c1()
|
D | 9p8x-minmax-scalar-c1.c | 91 const float vi7 = *i7++; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 97 const float vsum67 = vi6 + vi7; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() 160 const float vi7 = *i7++; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 166 const float vsum67 = vi6 + vi7; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() 252 const float vi7 = *i7++; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1() local 258 const float vsum67 = vi6 + vi7; in xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1()
|
D | 9p8x-minmax-wasmsimd-x86-c4.c | 98 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 106 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 176 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 183 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 278 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 286 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 311 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 317 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_pavgpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
/external/XNNPACK/src/f32-avgpool/ |
D | 9p8x-minmax-neon-c4.c | 91 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 97 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 159 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 165 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 250 const float32x4_t vi7 = vld1q_f32(i7); i7 += 4; in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 256 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() 278 const float32x4_t vi7 = vld1q_f32(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4() local 284 const float32x4_t vsum67 = vaddq_f32(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__neon_c4()
|
D | 9p8x-minmax-wasmsimd-arm-c4.c | 98 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 106 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 176 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 183 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 275 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 283 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() 306 const v128_t vi7 = wasm_v128_load(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4() local 312 const v128_t vsum67 = wasm_f32x4_add(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__wasmsimd_arm_c4()
|
D | 9p8x-minmax-sse-c4.c | 98 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 106 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 175 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 182 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 273 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 281 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() 304 const __m128 vi7 = _mm_loadu_ps(i7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4() local 310 const __m128 vsum67 = _mm_add_ps(vi6, vi7); in xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4()
|
D | 9p8x-minmax-wasm-c1.c | 91 const float vi7 = *i7++; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 97 const float vsum67 = vi6 + vi7; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() 160 const float vi7 = *i7++; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 166 const float vsum67 = vi6 + vi7; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() 250 const float vi7 = *i7++; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1() local 256 const float vsum67 = vi6 + vi7; in xnn_f32_avgpool_minmax_ukernel_9p8x__wasm_c1()
|
D | 9p8x-minmax-scalar-c1.c | 91 const float vi7 = *i7++; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 97 const float vsum67 = vi6 + vi7; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() 160 const float vi7 = *i7++; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 166 const float vsum67 = vi6 + vi7; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() 250 const float vi7 = *i7++; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1() local 256 const float vsum67 = vi6 + vi7; in xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1()
|