• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xnnpack/gavgpool.h>
9 #include <xnnpack/math.h>
10 
11 
xnn_f32_gavgpool_ukernel_mp7p7q__scalar(size_t m,size_t n,const float * input,size_t input_stride,const float * zero,float * buffer,float * output,const union xnn_f32_avgpool_params params[restrict static1])12 void xnn_f32_gavgpool_ukernel_mp7p7q__scalar(
13     size_t m,
14     size_t n,
15     const float* input,
16     size_t input_stride,
17     const float* zero,
18     float* buffer,
19     float* output,
20     const union xnn_f32_avgpool_params params[restrict static 1])
21 {
22   assert(m > 7);
23   assert(n != 0);
24 
25   const float* i0 = input;
26   const float* i1 = (const float*) ((uintptr_t) i0 + input_stride);
27   const float* i2 = (const float*) ((uintptr_t) i1 + input_stride);
28   const float* i3 = (const float*) ((uintptr_t) i2 + input_stride);
29   const float* i4 = (const float*) ((uintptr_t) i3 + input_stride);
30   const float* i5 = (const float*) ((uintptr_t) i4 + input_stride);
31   const float* i6 = (const float*) ((uintptr_t) i5 + input_stride);
32   const size_t input_increment = 7 * input_stride - n * sizeof(float);
33 
34   float* b = buffer;
35   size_t k = n;
36   do {
37     const float vi0 = *i0++;
38     const float vi1 = *i1++;
39     const float vi2 = *i2++;
40     const float vi3 = *i3++;
41     const float vi4 = *i4++;
42     const float vi5 = *i5++;
43     const float vi6 = *i6++;
44 
45     const float vsum01 = vi0 + vi1;
46     const float vsum23 = vi2 + vi3;
47     const float vsum45 = vi4 + vi5;
48 
49     const float vsum016 = vsum01 + vi6;
50     const float vsum2345 = vsum23 + vsum45;
51 
52     const float vsum = vsum016 + vsum2345;
53 
54     *b++ = vsum;
55   } while (--k != 0);
56   for (m -= 7; m > 7; m -= 7) {
57     b = buffer;
58 
59     i0 = (const float*) ((uintptr_t) i0 + input_increment);
60     i1 = (const float*) ((uintptr_t) i1 + input_increment);
61     i2 = (const float*) ((uintptr_t) i2 + input_increment);
62     i3 = (const float*) ((uintptr_t) i3 + input_increment);
63     i4 = (const float*) ((uintptr_t) i4 + input_increment);
64     i5 = (const float*) ((uintptr_t) i5 + input_increment);
65     i6 = (const float*) ((uintptr_t) i6 + input_increment);
66 
67     size_t k = n;
68     do {
69       const float vi0 = *i0++;
70       const float vi1 = *i1++;
71       const float vi2 = *i2++;
72       const float vi3 = *i3++;
73       const float vi4 = *i4++;
74       const float vi5 = *i5++;
75       const float vi6 = *i6++;
76       const float vacc = *b;
77 
78       const float vsum01 = vi0 + vi1;
79       const float vsum23 = vi2 + vi3;
80       const float vsum45 = vi4 + vi5;
81       const float vsum6a = vi6 + vacc;
82 
83       const float vsum0123 = vsum01 + vsum23;
84       const float vsum456a = vsum45 + vsum6a;
85 
86       const float vsum = vsum0123 + vsum456a;
87 
88       *b++ = vsum;
89     } while (--k != 0);
90   }
91 
92   i0 = (const float*) ((uintptr_t) i0 + input_increment);
93   i1 = (const float*) ((uintptr_t) i1 + input_increment);
94   if (m < 2) {
95     i1 = zero;
96   }
97   i2 = (const float*) ((uintptr_t) i2 + input_increment);
98   if (m <= 2) {
99     i2 = zero;
100   }
101   i3 = (const float*) ((uintptr_t) i3 + input_increment);
102   if (m < 4) {
103     i3 = zero;
104   }
105   i4 = (const float*) ((uintptr_t) i4 + input_increment);
106   if (m <= 4) {
107     i4 = zero;
108   }
109   i5 = (const float*) ((uintptr_t) i5 + input_increment);
110   if (m < 6) {
111     i5 = zero;
112   }
113   i6 = (const float*) ((uintptr_t) i6 + input_increment);
114   if (m <= 6) {
115     i6 = zero;
116   }
117   const float vmultiplier = params->scalar.multiplier;
118   const float voutput_min = params->scalar.output_min;
119   const float voutput_max = params->scalar.output_max;
120 
121   b = buffer;
122   do {
123     const float vi0 = *i0++;
124     const float vi1 = *i1++;
125     const float vi2 = *i2++;
126     const float vi3 = *i3++;
127     const float vi4 = *i4++;
128     const float vi5 = *i5++;
129     const float vi6 = *i6++;
130     const float vacc = *b++;
131 
132     const float vsum01 = vi0 + vi1;
133     const float vsum23 = vi2 + vi3;
134     const float vsum45 = vi4 + vi5;
135     const float vsum6a = vi6 + vacc;
136 
137     const float vsum0123 = vsum01 + vsum23;
138     const float vsum456a = vsum45 + vsum6a;
139 
140     const float vsum = vsum0123 + vsum456a;
141 
142     float vout = vsum * vmultiplier;
143     vout = math_max_f32(vout, voutput_min);
144     vout = math_min_f32(vout, voutput_max);
145 
146     *output++ = vout;
147   } while (--n != 0);
148 }
149