1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7
8 #include <xnnpack/pavgpool.h>
9 #include <xnnpack/math.h>
10
11
xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1(size_t output_pixels,size_t kernel_elements,size_t channels,const float ** input,size_t input_offset,const float * zero,const float * multiplier,float * buffer,float * output,size_t input_increment,size_t output_increment,const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])12 void xnn_f32_pavgpool_minmax_ukernel_9p8x__scalar_c1(
13 size_t output_pixels,
14 size_t kernel_elements,
15 size_t channels,
16 const float** input,
17 size_t input_offset,
18 const float* zero,
19 const float* multiplier,
20 float* buffer,
21 float* output,
22 size_t input_increment,
23 size_t output_increment,
24 const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
25 {
26 assert(output_pixels != 0);
27 assert(kernel_elements > 9);
28 assert(channels != 0);
29
30 const float voutput_min = params->scalar.min;
31 const float voutput_max = params->scalar.max;
32
33 do {
34 {
35 const float* i0 = *input++;
36 assert(i0 != NULL);
37 if XNN_UNPREDICTABLE(i0 != zero) {
38 i0 = (const float*) ((uintptr_t) i0 + input_offset);
39 }
40 const float* i1 = *input++;
41 assert(i1 != NULL);
42 if XNN_UNPREDICTABLE(i1 != zero) {
43 i1 = (const float*) ((uintptr_t) i1 + input_offset);
44 }
45 const float* i2 = *input++;
46 assert(i2 != NULL);
47 if XNN_UNPREDICTABLE(i2 != zero) {
48 i2 = (const float*) ((uintptr_t) i2 + input_offset);
49 }
50 const float* i3 = *input++;
51 assert(i3 != NULL);
52 if XNN_UNPREDICTABLE(i3 != zero) {
53 i3 = (const float*) ((uintptr_t) i3 + input_offset);
54 }
55 const float* i4 = *input++;
56 assert(i4 != NULL);
57 if XNN_UNPREDICTABLE(i4 != zero) {
58 i4 = (const float*) ((uintptr_t) i4 + input_offset);
59 }
60 const float* i5 = *input++;
61 assert(i5 != NULL);
62 if XNN_UNPREDICTABLE(i5 != zero) {
63 i5 = (const float*) ((uintptr_t) i5 + input_offset);
64 }
65 const float* i6 = *input++;
66 assert(i6 != NULL);
67 if XNN_UNPREDICTABLE(i6 != zero) {
68 i6 = (const float*) ((uintptr_t) i6 + input_offset);
69 }
70 const float* i7 = *input++;
71 assert(i7 != NULL);
72 if XNN_UNPREDICTABLE(i7 != zero) {
73 i7 = (const float*) ((uintptr_t) i7 + input_offset);
74 }
75 const float* i8 = *input++;
76 assert(i8 != NULL);
77 if XNN_UNPREDICTABLE(i8 != zero) {
78 i8 = (const float*) ((uintptr_t) i8 + input_offset);
79 }
80
81 float* b = buffer;
82 size_t c = channels;
83 do {
84 const float vi0 = *i0++;
85 const float vi1 = *i1++;
86 const float vi2 = *i2++;
87 const float vi3 = *i3++;
88 const float vi4 = *i4++;
89 const float vi5 = *i5++;
90 const float vi6 = *i6++;
91 const float vi7 = *i7++;
92 const float vi8 = *i8++;
93
94 const float vsum01 = vi0 + vi1;
95 const float vsum23 = vi2 + vi3;
96 const float vsum45 = vi4 + vi5;
97 const float vsum67 = vi6 + vi7;
98 const float vsum018 = vsum01 + vi8;
99 const float vsum2345 = vsum23 + vsum45;
100 const float vsum01678 = vsum018 + vsum67;
101 const float vsum = vsum2345 + vsum01678;
102
103 *b++ = vsum;
104 } while (--c != 0);
105 }
106
107 size_t k = kernel_elements;
108 for (k -= 9; k > 8; k -= 8) {
109 const float* i0 = *input++;
110 assert(i0 != NULL);
111 if XNN_UNPREDICTABLE(i0 != zero) {
112 i0 = (const float*) ((uintptr_t) i0 + input_offset);
113 }
114 const float* i1 = *input++;
115 assert(i1 != NULL);
116 if XNN_UNPREDICTABLE(i1 != zero) {
117 i1 = (const float*) ((uintptr_t) i1 + input_offset);
118 }
119 const float* i2 = *input++;
120 assert(i2 != NULL);
121 if XNN_UNPREDICTABLE(i2 != zero) {
122 i2 = (const float*) ((uintptr_t) i2 + input_offset);
123 }
124 const float* i3 = *input++;
125 assert(i3 != NULL);
126 if XNN_UNPREDICTABLE(i3 != zero) {
127 i3 = (const float*) ((uintptr_t) i3 + input_offset);
128 }
129 const float* i4 = *input++;
130 assert(i4 != NULL);
131 if XNN_UNPREDICTABLE(i4 != zero) {
132 i4 = (const float*) ((uintptr_t) i4 + input_offset);
133 }
134 const float* i5 = *input++;
135 assert(i5 != NULL);
136 if XNN_UNPREDICTABLE(i5 != zero) {
137 i5 = (const float*) ((uintptr_t) i5 + input_offset);
138 }
139 const float* i6 = *input++;
140 assert(i6 != NULL);
141 if XNN_UNPREDICTABLE(i6 != zero) {
142 i6 = (const float*) ((uintptr_t) i6 + input_offset);
143 }
144 const float* i7 = *input++;
145 assert(i7 != NULL);
146 if XNN_UNPREDICTABLE(i7 != zero) {
147 i7 = (const float*) ((uintptr_t) i7 + input_offset);
148 }
149
150 float* b = buffer;
151 size_t c = channels;
152 do {
153 const float vi0 = *i0++;
154 const float vi1 = *i1++;
155 const float vi2 = *i2++;
156 const float vi3 = *i3++;
157 const float vi4 = *i4++;
158 const float vi5 = *i5++;
159 const float vi6 = *i6++;
160 const float vi7 = *i7++;
161 const float vacc = *b;
162
163 const float vsum01 = vi0 + vi1;
164 const float vsum23 = vi2 + vi3;
165 const float vsum45 = vi4 + vi5;
166 const float vsum67 = vi6 + vi7;
167 const float vsum01a = vsum01 + vacc;
168 const float vsum2345 = vsum23 + vsum45;
169 const float vsum0167a = vsum01a + vsum67;
170 const float vsum = vsum2345 + vsum0167a;
171
172 *b++ = vsum;
173 } while (--c != 0);
174 }
175
176 {
177 const float* i0 = input[0];
178 assert(i0 != NULL);
179 const float* i1 = input[1];
180 const float* i2 = input[2];
181 const float* i3 = input[3];
182 const float* i4 = input[4];
183 const float* i5 = input[5];
184 const float* i6 = input[6];
185 const float* i7 = input[7];
186 input = (const float**) ((uintptr_t) input + input_increment);
187 if (k < 2) {
188 i1 = zero;
189 }
190 assert(i1 != NULL);
191 if (k <= 2) {
192 i2 = zero;
193 }
194 assert(i2 != NULL);
195 if (k < 4) {
196 i3 = zero;
197 }
198 assert(i3 != NULL);
199 if (k <= 4) {
200 i4 = zero;
201 }
202 assert(i4 != NULL);
203 if (k < 6) {
204 i5 = zero;
205 }
206 assert(i5 != NULL);
207 if (k <= 6) {
208 i6 = zero;
209 }
210 assert(i6 != NULL);
211 if (k < 8) {
212 i7 = zero;
213 }
214 assert(i7 != NULL);
215 if XNN_UNPREDICTABLE(i0 != zero) {
216 i0 = (const float*) ((uintptr_t) i0 + input_offset);
217 }
218 if XNN_UNPREDICTABLE(i1 != zero) {
219 i1 = (const float*) ((uintptr_t) i1 + input_offset);
220 }
221 if XNN_UNPREDICTABLE(i2 != zero) {
222 i2 = (const float*) ((uintptr_t) i2 + input_offset);
223 }
224 if XNN_UNPREDICTABLE(i3 != zero) {
225 i3 = (const float*) ((uintptr_t) i3 + input_offset);
226 }
227 if XNN_UNPREDICTABLE(i4 != zero) {
228 i4 = (const float*) ((uintptr_t) i4 + input_offset);
229 }
230 if XNN_UNPREDICTABLE(i5 != zero) {
231 i5 = (const float*) ((uintptr_t) i5 + input_offset);
232 }
233 if XNN_UNPREDICTABLE(i6 != zero) {
234 i6 = (const float*) ((uintptr_t) i6 + input_offset);
235 }
236 if XNN_UNPREDICTABLE(i7 != zero) {
237 i7 = (const float*) ((uintptr_t) i7 + input_offset);
238 }
239
240 const float vmultiplier = *multiplier++;
241
242 size_t c = channels;
243 float* b = buffer;
244 do {
245 const float vi0 = *i0++;
246 const float vi1 = *i1++;
247 const float vi2 = *i2++;
248 const float vi3 = *i3++;
249 const float vi4 = *i4++;
250 const float vi5 = *i5++;
251 const float vi6 = *i6++;
252 const float vi7 = *i7++;
253 const float vacc = *b++;
254
255 const float vsum01 = vi0 + vi1;
256 const float vsum23 = vi2 + vi3;
257 const float vsum45 = vi4 + vi5;
258 const float vsum67 = vi6 + vi7;
259 const float vsum01a = vsum01 + vacc;
260 const float vsum2345 = vsum23 + vsum45;
261 const float vsum0167a = vsum01a + vsum67;
262 const float vsum = vsum2345 + vsum0167a;
263
264 float vout = vsum * vmultiplier;
265 vout = math_max_f32(vout, voutput_min);
266 vout = math_min_f32(vout, voutput_max);
267
268 *output++ = vout;
269 } while (--c != 0);
270 }
271 output = (float*) ((uintptr_t) output + output_increment);
272 } while (--output_pixels != 0);
273 }
274