Searched refs:vmax_lo (Results 1 – 7 of 7) sorted by relevance
/external/XNNPACK/src/f32-rmax/ |
D | avx.c | 43 __m128 vmax_lo = _mm_max_ps(_mm256_castps256_ps128(vmax), _mm256_extractf128_ps(vmax, 1)); in xnn_f32_rmax_ukernel__avx() local 44 vmax_lo = _mm_max_ps(vmax_lo, _mm_movehl_ps(vmax_lo, vmax_lo)); in xnn_f32_rmax_ukernel__avx() 45 vmax_lo = _mm_max_ss(vmax_lo, _mm_shuffle_ps(vmax_lo, vmax_lo, _MM_SHUFFLE(3, 3, 1, 1))); in xnn_f32_rmax_ukernel__avx() 48 vmax_lo = _mm_max_ss(vmax_lo, _mm_load_ss(x)); in xnn_f32_rmax_ukernel__avx() 53 _mm_store_ss(y, vmax_lo); in xnn_f32_rmax_ukernel__avx()
|
D | sse.c | 44 __m128 vmax_lo = _mm_max_ps(vmax, _mm_movehl_ps(vmax, vmax)); in xnn_f32_rmax_ukernel__sse() local 45 vmax_lo = _mm_max_ss(vmax_lo, _mm_shuffle_ps(vmax_lo, vmax_lo, _MM_SHUFFLE(3, 3, 1, 1))); in xnn_f32_rmax_ukernel__sse() 48 vmax_lo = _mm_max_ss(vmax_lo, _mm_load_ss(x)); in xnn_f32_rmax_ukernel__sse() 53 _mm_store_ss(y, vmax_lo); in xnn_f32_rmax_ukernel__sse()
|
D | neon.c | 43 float32x2_t vmax_lo = vget_low_f32(vpmaxq_f32(vmax, vmax)); in xnn_f32_rmax_ukernel__neon() local 45 float32x2_t vmax_lo = vmax_f32(vget_low_f32(vmax), vget_high_f32(vmax)); in xnn_f32_rmax_ukernel__neon() local 50 vmax_lo = vmax_f32(vmax_lo, vx); in xnn_f32_rmax_ukernel__neon() 55 *y = vmaxv_f32(vmax_lo); in xnn_f32_rmax_ukernel__neon() 57 vst1_lane_f32(y, vpmax_f32(vmax_lo, vmax_lo), 0); in xnn_f32_rmax_ukernel__neon()
|
D | avx512f.c | 44 …__m256 vmax_lo = _mm256_max_ps(_mm512_castps512_ps256(vmax), _mm512_castps512_ps256(_mm512_shuffle… in xnn_f32_rmax_ukernel__avx512f() local 45 __m128 vmax_ll = _mm_max_ps(_mm256_castps256_ps128(vmax_lo), _mm256_extractf128_ps(vmax_lo, 1)); in xnn_f32_rmax_ukernel__avx512f()
|
/external/XNNPACK/src/f32-argmaxpool/ |
D | 4x-neon-c4.c | 94 float32x2_t vmax_lo = vget_low_f32(vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 97 vst1_f32(output, vmax_lo); output += 2; in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 99 vmax_lo = vget_high_f32(vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 103 vst1_lane_f32(output, vmax_lo, 0); output += 1; in xnn_f32_argmaxpool_ukernel_4x__neon_c4()
|
D | 9x-neon-c4.c | 169 float32x2_t vmax_lo = vget_low_f32(vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() local 172 vst1_f32(output, vmax_lo); output += 2; in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 174 vmax_lo = vget_high_f32(vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 178 vst1_lane_f32(output, vmax_lo, 0); output += 1; in xnn_f32_argmaxpool_ukernel_9x__neon_c4()
|
D | 9p8x-neon-c4.c | 341 float32x2_t vmax_lo = vget_low_f32(vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 344 vst1_f32(o, vmax_lo); o += 2; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 346 vmax_lo = vget_high_f32(vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 350 vst1_lane_f32(o, vmax_lo, 0); o += 1; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
|