1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7
8 #include <xnnpack/pavgpool.h>
9 #include <xnnpack/math.h>
10
11
xnn_f32_pavgpool_ukernel_mp9p8q__scalar(size_t n,size_t ks,size_t kc,const float ** input,const float * zero,const float * multiplier,float * buffer,float * output,size_t input_increment,size_t output_increment,const union xnn_f32_output_params params[restrict static1])12 void xnn_f32_pavgpool_ukernel_mp9p8q__scalar(
13 size_t n,
14 size_t ks,
15 size_t kc,
16 const float** input,
17 const float* zero,
18 const float* multiplier,
19 float* buffer,
20 float* output,
21 size_t input_increment,
22 size_t output_increment,
23 const union xnn_f32_output_params params[restrict static 1])
24 {
25 assert(n != 0);
26 assert(ks > 9);
27 assert(kc != 0);
28
29 const float voutput_min = params->scalar.min;
30 const float voutput_max = params->scalar.max;
31
32 do {
33 {
34 const float* i0 = *input++;
35 const float* i1 = *input++;
36 const float* i2 = *input++;
37 const float* i3 = *input++;
38 const float* i4 = *input++;
39 const float* i5 = *input++;
40 const float* i6 = *input++;
41 const float* i7 = *input++;
42 const float* i8 = *input++;
43
44 float* b = buffer;
45 size_t k = kc;
46 do {
47 const float vi0 = *i0++;
48 const float vi1 = *i1++;
49 const float vi2 = *i2++;
50 const float vi3 = *i3++;
51 const float vi4 = *i4++;
52 const float vi5 = *i5++;
53 const float vi6 = *i6++;
54 const float vi7 = *i7++;
55 const float vi8 = *i8++;
56
57 const float vsum01 = vi0 + vi1;
58 const float vsum23 = vi2 + vi3;
59 const float vsum45 = vi4 + vi5;
60 const float vsum67 = vi6 + vi7;
61 const float vsum018 = vsum01 + vi8;
62 const float vsum2345 = vsum23 + vsum45;
63 const float vsum01678 = vsum018 + vsum67;
64 const float vsum = vsum2345 + vsum01678;
65
66 *b++ = vsum;
67 } while (--k != 0);
68 }
69
70 size_t m = ks;
71 for (m -= 9; m > 8; m -= 8) {
72 const float* i0 = *input++;
73 const float* i1 = *input++;
74 const float* i2 = *input++;
75 const float* i3 = *input++;
76 const float* i4 = *input++;
77 const float* i5 = *input++;
78 const float* i6 = *input++;
79 const float* i7 = *input++;
80
81 float* b = buffer;
82 size_t k = kc;
83 do {
84 const float vi0 = *i0++;
85 const float vi1 = *i1++;
86 const float vi2 = *i2++;
87 const float vi3 = *i3++;
88 const float vi4 = *i4++;
89 const float vi5 = *i5++;
90 const float vi6 = *i6++;
91 const float vi7 = *i7++;
92 const float vacc = *b;
93
94 const float vsum01 = vi0 + vi1;
95 const float vsum23 = vi2 + vi3;
96 const float vsum45 = vi4 + vi5;
97 const float vsum67 = vi6 + vi7;
98 const float vsum01a = vsum01 + vacc;
99 const float vsum2345 = vsum23 + vsum45;
100 const float vsum0167a = vsum01a + vsum67;
101 const float vsum = vsum2345 + vsum0167a;
102
103 *b++ = vsum;
104 } while (--k != 0);
105 }
106
107 {
108 const float* i0 = input[0];
109 const float* i1 = input[1];
110 const float* i2 = input[2];
111 const float* i3 = input[3];
112 const float* i4 = input[4];
113 const float* i5 = input[5];
114 const float* i6 = input[6];
115 const float* i7 = input[7];
116 input = (const float**) ((uintptr_t) input + input_increment);
117 if (m < 2) {
118 i1 = zero;
119 }
120 if (m <= 2) {
121 i2 = zero;
122 }
123 if (m < 4) {
124 i3 = zero;
125 }
126 if (m <= 4) {
127 i4 = zero;
128 }
129 if (m < 6) {
130 i5 = zero;
131 }
132 if (m <= 6) {
133 i6 = zero;
134 }
135 if (m != 8) {
136 i7 = zero;
137 }
138
139 const float vmultiplier = *multiplier++;
140
141 size_t k = kc;
142 float* b = buffer;
143 do {
144 const float vi0 = *i0++;
145 const float vi1 = *i1++;
146 const float vi2 = *i2++;
147 const float vi3 = *i3++;
148 const float vi4 = *i4++;
149 const float vi5 = *i5++;
150 const float vi6 = *i6++;
151 const float vi7 = *i7++;
152 const float vacc = *b++;
153
154 const float vsum01 = vi0 + vi1;
155 const float vsum23 = vi2 + vi3;
156 const float vsum45 = vi4 + vi5;
157 const float vsum67 = vi6 + vi7;
158 const float vsum01a = vsum01 + vacc;
159 const float vsum2345 = vsum23 + vsum45;
160 const float vsum0167a = vsum01a + vsum67;
161 const float vsum = vsum2345 + vsum0167a;
162
163 float vout = vsum * vmultiplier;
164 vout = math_max_f32(vout, voutput_min);
165 vout = math_min_f32(vout, voutput_max);
166
167 *output++ = vout;
168 } while (--k != 0);
169 }
170 output = (float*) ((uintptr_t) output + output_increment);
171 } while (--n != 0);
172 }
173