• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xnnpack/gavgpool.h>
9 #include <xnnpack/math.h>
10 
11 
xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1(size_t rows,size_t channels,const float * input,size_t input_stride,const float * zero,float * buffer,float * output,const union xnn_f32_scaleminmax_params params[restrict XNN_MIN_ELEMENTS (1)])12 void xnn_f32_gavgpool_minmax_ukernel_7p7x__scalar_c1(
13     size_t rows,
14     size_t channels,
15     const float* input,
16     size_t input_stride,
17     const float* zero,
18     float* buffer,
19     float* output,
20     const union xnn_f32_scaleminmax_params params[restrict XNN_MIN_ELEMENTS(1)])
21 {
22   assert(rows > 7);
23   assert(channels != 0);
24 
25   const float* i0 = input;
26   const float* i1 = (const float*) ((uintptr_t) i0 + input_stride);
27   const float* i2 = (const float*) ((uintptr_t) i1 + input_stride);
28   const float* i3 = (const float*) ((uintptr_t) i2 + input_stride);
29   const float* i4 = (const float*) ((uintptr_t) i3 + input_stride);
30   const float* i5 = (const float*) ((uintptr_t) i4 + input_stride);
31   const float* i6 = (const float*) ((uintptr_t) i5 + input_stride);
32   const size_t input_increment = 7 * input_stride - channels * sizeof(float);
33 
34   float* b = buffer;
35   size_t c = channels;
36   do {
37     const float vi0 = *i0++;
38     const float vi1 = *i1++;
39     const float vi2 = *i2++;
40     const float vi3 = *i3++;
41     const float vi4 = *i4++;
42     const float vi5 = *i5++;
43     const float vi6 = *i6++;
44 
45     const float vsum01 = vi0 + vi1;
46     const float vsum23 = vi2 + vi3;
47     const float vsum45 = vi4 + vi5;
48 
49     const float vsum016 = vsum01 + vi6;
50     const float vsum2345 = vsum23 + vsum45;
51 
52     const float vsum = vsum016 + vsum2345;
53 
54     *b++ = vsum;
55   } while (--c != 0);
56   for (rows -= 7; rows > 7; rows -= 7) {
57     b = buffer;
58 
59     i0 = (const float*) ((uintptr_t) i0 + input_increment);
60     i1 = (const float*) ((uintptr_t) i1 + input_increment);
61     i2 = (const float*) ((uintptr_t) i2 + input_increment);
62     i3 = (const float*) ((uintptr_t) i3 + input_increment);
63     i4 = (const float*) ((uintptr_t) i4 + input_increment);
64     i5 = (const float*) ((uintptr_t) i5 + input_increment);
65     i6 = (const float*) ((uintptr_t) i6 + input_increment);
66 
67     size_t c = channels;
68     do {
69       const float vi0 = *i0++;
70       const float vi1 = *i1++;
71       const float vi2 = *i2++;
72       const float vi3 = *i3++;
73       const float vi4 = *i4++;
74       const float vi5 = *i5++;
75       const float vi6 = *i6++;
76       const float vacc = *b;
77 
78       const float vsum01 = vi0 + vi1;
79       const float vsum23 = vi2 + vi3;
80       const float vsum45 = vi4 + vi5;
81       const float vsum6a = vi6 + vacc;
82 
83       const float vsum0123 = vsum01 + vsum23;
84       const float vsum456a = vsum45 + vsum6a;
85 
86       const float vsum = vsum0123 + vsum456a;
87 
88       *b++ = vsum;
89     } while (--c != 0);
90   }
91 
92   i0 = (const float*) ((uintptr_t) i0 + input_increment);
93   i1 = (const float*) ((uintptr_t) i1 + input_increment);
94   if (rows < 2) {
95     i1 = zero;
96   }
97   i2 = (const float*) ((uintptr_t) i2 + input_increment);
98   if (rows <= 2) {
99     i2 = zero;
100   }
101   i3 = (const float*) ((uintptr_t) i3 + input_increment);
102   if (rows < 4) {
103     i3 = zero;
104   }
105   i4 = (const float*) ((uintptr_t) i4 + input_increment);
106   if (rows <= 4) {
107     i4 = zero;
108   }
109   i5 = (const float*) ((uintptr_t) i5 + input_increment);
110   if (rows < 6) {
111     i5 = zero;
112   }
113   i6 = (const float*) ((uintptr_t) i6 + input_increment);
114   if (rows <= 6) {
115     i6 = zero;
116   }
117   const float vscale = params->scalar.scale;
118   const float vmin = params->scalar.min;
119   const float vmax = params->scalar.max;
120 
121   b = buffer;
122   do {
123     const float vi0 = *i0++;
124     const float vi1 = *i1++;
125     const float vi2 = *i2++;
126     const float vi3 = *i3++;
127     const float vi4 = *i4++;
128     const float vi5 = *i5++;
129     const float vi6 = *i6++;
130     const float vacc = *b++;
131 
132     const float vsum01 = vi0 + vi1;
133     const float vsum23 = vi2 + vi3;
134     const float vsum45 = vi4 + vi5;
135     const float vsum6a = vi6 + vacc;
136 
137     const float vsum0123 = vsum01 + vsum23;
138     const float vsum456a = vsum45 + vsum6a;
139 
140     const float vsum = vsum0123 + vsum456a;
141 
142     float vout = vsum * vscale;
143     vout = math_max_f32(vout, vmin);
144     vout = math_min_f32(vout, vmax);
145 
146     *output++ = vout;
147   } while (--channels != 0);
148 }
149