• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 
11 #include <emmintrin.h>
12 
13 #include <xnnpack/rmax.h>
14 
15 
xnn_u8_rmax_ukernel__sse2(size_t n,const uint8_t * x,uint8_t * y)16 void xnn_u8_rmax_ukernel__sse2(
17     size_t n,
18     const uint8_t* x,
19     uint8_t* y)
20 {
21   assert(n != 0);
22 
23   if XNN_LIKELY(n >= 16) {
24     __m128i vmax = _mm_setzero_si128();
25     do {
26       const __m128i vx = _mm_loadu_si128((const __m128i*) x);
27       x += 16;
28       vmax = _mm_max_epu8(vmax, vx);
29       n -= 16;
30     } while (n >= 16);
31     if (n != 0) {
32       const size_t x_increment = n - 16;
33       x = (const uint8_t*) ((uintptr_t) x + x_increment);
34       const __m128i vx = _mm_loadu_si128((const __m128i*) x);
35       vmax = _mm_max_epu8(vmax, vx);
36     }
37     vmax = _mm_max_epu8(vmax, _mm_unpackhi_epi64(vmax, vmax));
38     vmax = _mm_max_epu8(vmax, _mm_srli_epi64(vmax, 32));
39     vmax = _mm_max_epu8(vmax, _mm_srli_epi32(vmax, 16));
40     vmax = _mm_max_epu8(vmax, _mm_srli_epi16(vmax, 8));
41     *y = (uint8_t) _mm_cvtsi128_si32(vmax);
42   } else {
43     uint8_t vmax = 0;
44     do {
45       const uint8_t vx = *x++;
46       vmax = vx > vmax ? vx : vmax;
47     } while (--n != 0);
48     *y = vmax;
49   }
50 }
51