/external/XNNPACK/src/f32-prelu/gen/ |
D | avx512f-2x32.c | 82 const __m512 vi1 = _mm512_loadu_ps(i1); in xnn_f32_prelu_ukernel__avx512f_2x32() local 87 const __mmask16 vsign1 = _mm512_cmp_ps_mask(vi1, vzero, _CMP_LT_OQ); in xnn_f32_prelu_ukernel__avx512f_2x32() 88 const __m512 vacc1 = _mm512_mask_mul_ps(vi1, vsign1, vi1, vw); in xnn_f32_prelu_ukernel__avx512f_2x32() 105 const __m512 vi1 = _mm512_maskz_loadu_ps(vmask, i1); in xnn_f32_prelu_ukernel__avx512f_2x32() local 110 const __mmask16 vsign1 = _mm512_cmp_ps_mask(vi1, vzero, _CMP_LT_OQ); in xnn_f32_prelu_ukernel__avx512f_2x32() 111 const __m512 vacc1 = _mm512_mask_mul_ps(vi1, vsign1, vi1, vw); in xnn_f32_prelu_ukernel__avx512f_2x32()
|
D | wasm-2x1.c | 49 float vi1 = *i1++; in xnn_f32_prelu_ukernel__wasm_2x1() local 53 float vacc1 = __builtin_wasm_max_f32(vi1, vzero); in xnn_f32_prelu_ukernel__wasm_2x1() 54 vi1 = __builtin_wasm_min_f32(vi1, vzero); in xnn_f32_prelu_ukernel__wasm_2x1() 57 vacc1 += vi1 * vw; in xnn_f32_prelu_ukernel__wasm_2x1()
|
D | avx-2x16.c | 83 const __m256 vi1 = _mm256_loadu_ps(i1); in xnn_f32_prelu_ukernel__avx_2x16() local 87 const __m256 vprod1 = _mm256_mul_ps(vi1, vw); in xnn_f32_prelu_ukernel__avx_2x16() 90 const __m256 vacc1 = _mm256_blendv_ps(vi1, vprod1, vi1); in xnn_f32_prelu_ukernel__avx_2x16() 106 const __m256 vi1 = _mm256_maskload_ps(i1, vmask); in xnn_f32_prelu_ukernel__avx_2x16() local 110 const __m256 vprod1 = _mm256_mul_ps(vi1, vw); in xnn_f32_prelu_ukernel__avx_2x16() 113 __m256 vacc1 = _mm256_blendv_ps(vi1, vprod1, vi1); in xnn_f32_prelu_ukernel__avx_2x16()
|
D | scalar-2x1.c | 48 const float vi1 = *i1++; in xnn_f32_prelu_ukernel__scalar_2x1() local 51 const float vacc1 = XNN_UNPREDICTABLE(vi1 < 0.0f) ? vi1 * vw : vi1; in xnn_f32_prelu_ukernel__scalar_2x1()
|
D | wasm-2x4.c | 105 float vi1 = *i1++; in xnn_f32_prelu_ukernel__wasm_2x4() local 109 float vacc1 = __builtin_wasm_max_f32(vi1, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 110 vi1 = __builtin_wasm_min_f32(vi1, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 113 vacc1 += vi1 * vw; in xnn_f32_prelu_ukernel__wasm_2x4()
|
/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-scalar-c1.c | 57 const float vi1 = *i1++; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 69 if (vi1 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 70 vmax = vi1; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 139 const float vi1 = *i1++; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 155 if (vi1 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 156 vmax = vi1; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 243 const float vi1 = *i1++; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 259 if (vi1 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 260 vmax = vi1; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1()
|
D | 4x-wasmsimd-c4.c | 53 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() local 63 const v128_t vm1 = wasm_f32x4_gt(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 64 vmax = wasm_v128_bitselect(vi1, vmax, vm1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 82 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() local 89 const v128_t vm1 = wasm_f32x4_gt(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 90 vmax = wasm_v128_bitselect(vi1, vmax, vm1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4()
|
D | 4x-neon-c4.c | 51 const float32x4_t vi1 = vld1q_f32(i1); i1 += 4; in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 58 const uint32x4_t vm1 = vcgtq_f32(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 59 vmax = vbslq_f32(vm1, vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 75 const float32x4_t vi1 = vld1q_f32(i1); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 82 const uint32x4_t vm1 = vcgtq_f32(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 83 vmax = vbslq_f32(vm1, vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4()
|
D | 4x-sse2-c4.c | 52 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() local 62 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 63 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 81 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() local 88 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 89 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x1-minmax-scalar-pipelined.c | 41 float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() local 63 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() 75 vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() 123 float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() local 137 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() 145 vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() 175 float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() local 185 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined() 191 vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar_pipelined()
|
D | 4x1-minmax-scalar-pipelined.c | 41 float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined() local 55 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined() 63 vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined() 95 float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined() local 105 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined() 111 vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined()
|
D | 8x1-minmax-scalar.c | 53 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 63 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 122 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 132 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 190 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 196 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 227 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 233 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 271 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 275 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() [all …]
|
D | 8x2-minmax-scalar.c | 61 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 72 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 80 vacc1x1 += vi1 * vw1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 164 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 174 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 236 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 243 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 247 vacc1x1 += vi1 * vw1; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 291 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 297 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() [all …]
|
D | 8x4-minmax-scalar.c | 77 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 90 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 98 vacc1x1 += vi1 * vw1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 106 vacc1x2 += vi1 * vw2; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 114 vacc1x3 += vi1 * vw3; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 248 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 258 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 328 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 337 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 341 vacc1x1 += vi1 * vw1; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
D | 4x1-minmax-scalar.c | 49 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 55 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 90 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 96 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 136 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 140 vacc1x0 += vi1 * vw0; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() 161 const float vi1 = input[1]; in xnn_f32_spmm_minmax_ukernel_4x1__scalar() local 165 vacc1 += vi1 * vw; in xnn_f32_spmm_minmax_ukernel_4x1__scalar()
|
/external/XNNPACK/src/f32-maxpool/ |
D | 9p8x-minmax-wasmsimd-x86-c4.c | 80 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 97 const v128_t vmax01 = wasm_v128_bitselect(vi1, vi0, wasm_f32x4_lt(vi0, vi1)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 119 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 136 const v128_t vmax01 = wasm_v128_bitselect(vi1, vi0, wasm_f32x4_lt(vi0, vi1)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 207 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 223 const v128_t vmax01 = wasm_v128_bitselect(vi1, vi0, wasm_f32x4_lt(vi0, vi1)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() 244 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4() local 253 const v128_t vmax01 = wasm_v128_bitselect(vi1, vi0, wasm_f32x4_lt(vi0, vi1)); in xnn_f32_maxpool_minmax_ukernel_9p8x__wasmsimd_x86_c4()
|
D | 9p8x-minmax-sse-c4.c | 80 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 97 const __m128 vmax018 = _mm_max_ps(_mm_max_ps(vi0, vi1), vi8); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 113 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 130 const __m128 vmax018 = _mm_max_ps(_mm_max_ps(vi0, vi1), vi8); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 196 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 212 const __m128 vmax01 = _mm_max_ps(_mm_max_ps(vi0, vi1), vo); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() 227 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4() local 236 const __m128 vmax01 = _mm_max_ps(_mm_max_ps(vi0, vi1), vo); in xnn_f32_maxpool_minmax_ukernel_9p8x__sse_c4()
|
/external/XNNPACK/src/f32-gavgpool/ |
D | 7p7x-minmax-scalar-c1.c | 38 const float vi1 = *i1++; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 45 const float vsum01 = vi0 + vi1; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() 70 const float vi1 = *i1++; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 78 const float vsum01 = vi0 + vi1; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() 124 const float vi1 = *i1++; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1() local 132 const float vsum01 = vi0 + vi1; in xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1()
|
D | 7p7x-minmax-wasm-c1.c | 38 const float vi1 = *i1++; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 45 const float vsum01 = vi0 + vi1; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() 70 const float vi1 = *i1++; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 78 const float vsum01 = vi0 + vi1; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() 124 const float vi1 = *i1++; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1() local 132 const float vsum01 = vi0 + vi1; in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasm_c1()
|
D | 7p7x-minmax-neon-c4.c | 40 const float32x4_t vi1 = vld1q_f32(i1); i1 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 47 const float32x4_t vsum01 = vaddq_f32(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 71 const float32x4_t vi1 = vld1q_f32(i1); i1 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 79 const float32x4_t vsum01 = vaddq_f32(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 125 const float32x4_t vi1 = vld1q_f32(i1); i1 += 4; in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 133 const float32x4_t vsum01 = vaddq_f32(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() 153 const float32x4_t vi1 = vld1q_f32(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4() local 161 const float32x4_t vsum01 = vaddq_f32(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__neon_c4()
|
D | 7p7x-minmax-wasmsimd-arm-c4.c | 41 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 54 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() 80 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 94 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() 141 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 156 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() 177 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4() local 185 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_arm_c4()
|
D | 7p7x-minmax-sse-c4.c | 41 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 54 const __m128 vsum01 = _mm_add_ps(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 79 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 93 const __m128 vsum01 = _mm_add_ps(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 140 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 155 const __m128 vsum01 = _mm_add_ps(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() 176 const __m128 vi1 = _mm_loadu_ps(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4() local 184 const __m128 vsum01 = _mm_add_ps(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__sse_c4()
|
D | 7p7x-minmax-wasmsimd-x86-c4.c | 41 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 54 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() 80 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 94 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() 141 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 156 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() 177 const v128_t vi1 = wasm_v128_load(i1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4() local 185 const v128_t vsum01 = wasm_f32x4_add(vi0, vi1); in xnn_f32_gavgpool_minmax_ukernel_7p7x__wasmsimd_x86_c4()
|
/external/XNNPACK/src/f32-gavgpool-cw/ |
D | neon-x4.c | 43 const float32x4_t vi1 = vld1q_f32(i1); i1 += 4; in xnn_f32_gavgpool_cw_ukernel__neon_x4() local 48 vsum1 = vaddq_f32(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 56 float32x4_t vi1 = vld1q_f32(i1); i1 = (const float*) ((uintptr_t) i1 + n); in xnn_f32_gavgpool_cw_ukernel__neon_x4() local 61 vi1 = vreinterpretq_f32_u32(vandq_u32(vmask, vreinterpretq_u32_f32(vi1))); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 66 vsum1 = vaddq_f32(vsum1, vi1); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
|
/external/XNNPACK/src/f16-gavgpool/ |
D | 7p7x-minmax-neonfp16arith-c8.c | 41 const float16x8_t vi1 = vld1q_f16(i1); i1 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 48 const float16x8_t vsum01 = vaddq_f16(vi0, vi1); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 72 const float16x8_t vi1 = vld1q_f16(i1); i1 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 80 const float16x8_t vsum01 = vaddq_f16(vi0, vi1); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 126 const float16x8_t vi1 = vld1q_f16(i1); i1 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 134 const float16x8_t vsum01 = vaddq_f16(vi0, vi1); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() 154 const float16x8_t vi1 = vld1q_f16(i1); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8() local 162 const float16x8_t vsum01 = vaddq_f16(vi0, vi1); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c8()
|