• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xnnpack/maxpool.h>
9 
10 
xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1(size_t output_pixels,size_t kernel_elements,size_t channels,const uint8_t ** input,size_t input_offset,uint8_t * output,size_t input_increment,size_t output_increment,const union xnn_u8_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])11 void xnn_u8_maxpool_minmax_ukernel_9p8x__scalar_c1(
12     size_t output_pixels,
13     size_t kernel_elements,
14     size_t channels,
15     const uint8_t** input,
16     size_t input_offset,
17     uint8_t* output,
18     size_t input_increment,
19     size_t output_increment,
20     const union xnn_u8_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
21 {
22   assert(output_pixels != 0);
23   assert(kernel_elements != 0);
24   assert(channels != 0);
25 
26   const uint8_t voutput_max = params->scalar.max;
27   const uint8_t voutput_min = params->scalar.min;
28   do {
29     uint8_t* o = output;
30     {
31       const uint8_t* i0 = *input++;
32       const uint8_t* i1 = *input++;
33       const uint8_t* i2 = *input++;
34       const uint8_t* i3 = *input++;
35       const uint8_t* i4 = *input++;
36       const uint8_t* i5 = *input++;
37       const uint8_t* i6 = *input++;
38       const uint8_t* i7 = *input++;
39       const uint8_t* i8 = *input++;
40       i0 = (const uint8_t*) ((uintptr_t) i0 + input_offset);
41       i1 = (const uint8_t*) ((uintptr_t) i1 + input_offset);
42       i2 = (const uint8_t*) ((uintptr_t) i2 + input_offset);
43       i3 = (const uint8_t*) ((uintptr_t) i3 + input_offset);
44       i4 = (const uint8_t*) ((uintptr_t) i4 + input_offset);
45       i5 = (const uint8_t*) ((uintptr_t) i5 + input_offset);
46       i6 = (const uint8_t*) ((uintptr_t) i6 + input_offset);
47       i7 = (const uint8_t*) ((uintptr_t) i7 + input_offset);
48       i8 = (const uint8_t*) ((uintptr_t) i8 + input_offset);
49       if (kernel_elements < 2) {
50         i1 = i0;
51       }
52       if (kernel_elements <= 2) {
53         i2 = i0;
54       }
55       if (kernel_elements < 4) {
56         i3 = i0;
57       }
58       if (kernel_elements <= 4) {
59         i4 = i0;
60       }
61       if (kernel_elements < 6) {
62         i5 = i0;
63       }
64       if (kernel_elements <= 6) {
65         i6 = i0;
66       }
67       if (kernel_elements < 8) {
68         i7 = i0;
69       }
70       if (kernel_elements <= 8) {
71         i8 = i0;
72       }
73 
74       size_t c = channels;
75       do {
76         const uint8_t vi0 = *i0++;
77         const uint8_t vi1 = *i1++;
78         const uint8_t vi2 = *i2++;
79         const uint8_t vi3 = *i3++;
80         const uint8_t vi4 = *i4++;
81         const uint8_t vi5 = *i5++;
82         const uint8_t vi6 = *i6++;
83         const uint8_t vi7 = *i7++;
84         const uint8_t vi8 = *i8++;
85 
86         const uint8_t vmax01 = vi0 > vi1 ? vi0 : vi1;
87         const uint8_t vmax23 = vi2 > vi3 ? vi2 : vi3;
88         const uint8_t vmax45 = vi4 > vi5 ? vi4 : vi5;
89         const uint8_t vmax67 = vi6 > vi7 ? vi6 : vi7;
90         const uint8_t vmax018 = vmax01 > vi8 ? vmax01 : vi8;
91 
92         const uint8_t vmax2345 = vmax23 > vmax45 ? vmax23 : vmax45;
93         const uint8_t vmax01678 = vmax018 > vmax67 ? vmax018 : vmax67;
94 
95         uint8_t vout = vmax2345 > vmax01678 ? vmax2345 : vmax01678;
96         vout = vout > voutput_max ? voutput_max : vout;
97         vout = vout < voutput_min ? voutput_min : vout;
98 
99         *o++ = vout;
100       } while (--c != 0);
101     }
102 
103     for (ptrdiff_t k = (ptrdiff_t) kernel_elements - 9; k > 0; k -= 8) {
104       const uint8_t* i0 = *input++;
105       const uint8_t* i1 = *input++;
106       const uint8_t* i2 = *input++;
107       const uint8_t* i3 = *input++;
108       const uint8_t* i4 = *input++;
109       const uint8_t* i5 = *input++;
110       const uint8_t* i6 = *input++;
111       const uint8_t* i7 = *input++;
112       i0 = (const uint8_t*) ((uintptr_t) i0 + input_offset);
113       i1 = (const uint8_t*) ((uintptr_t) i1 + input_offset);
114       i2 = (const uint8_t*) ((uintptr_t) i2 + input_offset);
115       i3 = (const uint8_t*) ((uintptr_t) i3 + input_offset);
116       i4 = (const uint8_t*) ((uintptr_t) i4 + input_offset);
117       i5 = (const uint8_t*) ((uintptr_t) i5 + input_offset);
118       i6 = (const uint8_t*) ((uintptr_t) i6 + input_offset);
119       i7 = (const uint8_t*) ((uintptr_t) i7 + input_offset);
120       if (k < 2) {
121         i1 = i0;
122       }
123       if (k <= 2) {
124         i2 = i0;
125       }
126       if (k < 4) {
127         i3 = i0;
128       }
129       if (k <= 4) {
130         i4 = i0;
131       }
132       if (k < 6) {
133         i5 = i0;
134       }
135       if (k <= 6) {
136         i6 = i0;
137       }
138       if (k < 8) {
139         i7 = i0;
140       }
141 
142       o = output;
143       size_t c = channels;
144       do {
145         const uint8_t vi0 = *i0++;
146         const uint8_t vi1 = *i1++;
147         const uint8_t vi2 = *i2++;
148         const uint8_t vi3 = *i3++;
149         const uint8_t vi4 = *i4++;
150         const uint8_t vi5 = *i5++;
151         const uint8_t vi6 = *i6++;
152         const uint8_t vi7 = *i7++;
153         const uint8_t vi8 = *o;
154 
155         const uint8_t vmax01 = vi0 > vi1 ? vi0 : vi1;
156         const uint8_t vmax23 = vi2 > vi3 ? vi2 : vi3;
157         const uint8_t vmax45 = vi4 > vi5 ? vi4 : vi5;
158         const uint8_t vmax67 = vi6 > vi7 ? vi6 : vi7;
159         const uint8_t vmax018 = vmax01 > vi8 ? vmax01 : vi8;
160 
161         const uint8_t vmax2345 = vmax23 > vmax45 ? vmax23 : vmax45;
162         const uint8_t vmax01678 = vmax018 > vmax67 ? vmax018 : vmax67;
163 
164         uint8_t vout = vmax2345 > vmax01678 ? vmax2345 : vmax01678;
165         vout = vout > voutput_max ? voutput_max : vout;
166         vout = vout < voutput_min ? voutput_min : vout;
167 
168         *o++ = vout;
169       } while (--c != 0);
170     }
171     input = (const uint8_t**) ((uintptr_t) input + input_increment);
172     output = (uint8_t*) ((uintptr_t) o + output_increment);
173   } while (--output_pixels != 0);
174 }
175