1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10
11 #include <emmintrin.h>
12
13 #include <xnnpack/rmax.h>
14
15
xnn_u8_rmax_ukernel__sse2(size_t n,const uint8_t * x,uint8_t * y)16 void xnn_u8_rmax_ukernel__sse2(
17 size_t n,
18 const uint8_t* x,
19 uint8_t* y)
20 {
21 assert(n != 0);
22
23 if XNN_LIKELY(n >= 16) {
24 __m128i vmax = _mm_setzero_si128();
25 do {
26 const __m128i vx = _mm_loadu_si128((const __m128i*) x);
27 x += 16;
28 vmax = _mm_max_epu8(vmax, vx);
29 n -= 16;
30 } while (n >= 16);
31 if (n != 0) {
32 const size_t x_increment = n - 16;
33 x = (const uint8_t*) ((uintptr_t) x + x_increment);
34 const __m128i vx = _mm_loadu_si128((const __m128i*) x);
35 vmax = _mm_max_epu8(vmax, vx);
36 }
37 vmax = _mm_max_epu8(vmax, _mm_unpackhi_epi64(vmax, vmax));
38 vmax = _mm_max_epu8(vmax, _mm_srli_epi64(vmax, 32));
39 vmax = _mm_max_epu8(vmax, _mm_srli_epi32(vmax, 16));
40 vmax = _mm_max_epu8(vmax, _mm_srli_epi16(vmax, 8));
41 *y = (uint8_t) _mm_cvtsi128_si32(vmax);
42 } else {
43 uint8_t vmax = 0;
44 do {
45 const uint8_t vx = *x++;
46 vmax = vx > vmax ? vx : vmax;
47 } while (--n != 0);
48 *y = vmax;
49 }
50 }
51