• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xnnpack/maxpool.h>
9 #include <xnnpack/math.h>
10 
11 
xnn_f32_maxpool_ukernel_9p8x__wasm_c1(size_t output_pixels,size_t kernel_elements,size_t channels,const float ** input,size_t input_offset,float * output,size_t input_increment,size_t output_increment,const union xnn_f32_output_params params[restrict static1])12 void xnn_f32_maxpool_ukernel_9p8x__wasm_c1(
13     size_t output_pixels,
14     size_t kernel_elements,
15     size_t channels,
16     const float** input,
17     size_t input_offset,
18     float* output,
19     size_t input_increment,
20     size_t output_increment,
21     const union xnn_f32_output_params params[restrict static 1])
22 {
23   assert(output_pixels != 0);
24   assert(kernel_elements != 0);
25   assert(channels != 0);
26 
27   const float voutput_min = params->scalar.min;
28   const float voutput_max = params->scalar.max;
29   do {
30     float* o = output;
31     {
32       const float* i0 = *input++;
33       const float* i1 = *input++;
34       const float* i2 = *input++;
35       const float* i3 = *input++;
36       const float* i4 = *input++;
37       const float* i5 = *input++;
38       const float* i6 = *input++;
39       const float* i7 = *input++;
40       const float* i8 = *input++;
41       i0 = (const float*) ((uintptr_t) i0 + input_offset);
42       i1 = (const float*) ((uintptr_t) i1 + input_offset);
43       i2 = (const float*) ((uintptr_t) i2 + input_offset);
44       i3 = (const float*) ((uintptr_t) i3 + input_offset);
45       i4 = (const float*) ((uintptr_t) i4 + input_offset);
46       i5 = (const float*) ((uintptr_t) i5 + input_offset);
47       i6 = (const float*) ((uintptr_t) i6 + input_offset);
48       i7 = (const float*) ((uintptr_t) i7 + input_offset);
49       i8 = (const float*) ((uintptr_t) i8 + input_offset);
50       if (kernel_elements < 2) {
51         i1 = i0;
52       }
53       if (kernel_elements <= 2) {
54         i2 = i0;
55       }
56       if (kernel_elements < 4) {
57         i3 = i0;
58       }
59       if (kernel_elements <= 4) {
60         i4 = i0;
61       }
62       if (kernel_elements < 6) {
63         i5 = i0;
64       }
65       if (kernel_elements <= 6) {
66         i6 = i0;
67       }
68       if (kernel_elements < 8) {
69         i7 = i0;
70       }
71       if (kernel_elements <= 8) {
72         i8 = i0;
73       }
74 
75       size_t c = channels;
76       do {
77         const float vi0 = *i0++;
78         const float vi1 = *i1++;
79         const float vi2 = *i2++;
80         const float vi3 = *i3++;
81         const float vi4 = *i4++;
82         const float vi5 = *i5++;
83         const float vi6 = *i6++;
84         const float vi7 = *i7++;
85         const float vi8 = *i8++;
86 
87         const float vmax01 = __builtin_wasm_max_f32(vi0, vi1);
88         const float vmax23 = __builtin_wasm_max_f32(vi2, vi3);
89         const float vmax45 = __builtin_wasm_max_f32(vi4, vi5);
90         const float vmax67 = __builtin_wasm_max_f32(vi6, vi7);
91         const float vmax018 = __builtin_wasm_max_f32(vmax01, vi8);
92 
93         const float vmax2345 = __builtin_wasm_max_f32(vmax23, vmax45);
94         const float vmax01678 = __builtin_wasm_max_f32(vmax018, vmax67);
95         float vout = __builtin_wasm_max_f32(vmax2345, vmax01678);
96         vout = __builtin_wasm_max_f32(vout, voutput_min);
97         vout = __builtin_wasm_min_f32(vout, voutput_max);
98 
99         *o++ = vout;
100       } while (--c != 0);
101     }
102 
103     for (ptrdiff_t k = (ptrdiff_t) kernel_elements - 9; k > 0; k -= 8) {
104       const float* i0 = *input++;
105       const float* i1 = *input++;
106       const float* i2 = *input++;
107       const float* i3 = *input++;
108       const float* i4 = *input++;
109       const float* i5 = *input++;
110       const float* i6 = *input++;
111       const float* i7 = *input++;
112       i0 = (const float*) ((uintptr_t) i0 + input_offset);
113       i1 = (const float*) ((uintptr_t) i1 + input_offset);
114       i2 = (const float*) ((uintptr_t) i2 + input_offset);
115       i3 = (const float*) ((uintptr_t) i3 + input_offset);
116       i4 = (const float*) ((uintptr_t) i4 + input_offset);
117       i5 = (const float*) ((uintptr_t) i5 + input_offset);
118       i6 = (const float*) ((uintptr_t) i6 + input_offset);
119       i7 = (const float*) ((uintptr_t) i7 + input_offset);
120       if (k < 2) {
121         i1 = i0;
122       }
123       if (k <= 2) {
124         i2 = i0;
125       }
126       if (k < 4) {
127         i3 = i0;
128       }
129       if (k <= 4) {
130         i4 = i0;
131       }
132       if (k < 6) {
133         i5 = i0;
134       }
135       if (k <= 6) {
136         i6 = i0;
137       }
138       if (k < 8) {
139         i7 = i0;
140       }
141 
142       o = output;
143       size_t c = channels;
144       do {
145         const float vi0 = *i0++;
146         const float vi1 = *i1++;
147         const float vi2 = *i2++;
148         const float vi3 = *i3++;
149         const float vi4 = *i4++;
150         const float vi5 = *i5++;
151         const float vi6 = *i6++;
152         const float vi7 = *i7++;
153         const float vi8 = *o;
154 
155         const float vmax01 = __builtin_wasm_max_f32(vi0, vi1);
156         const float vmax23 = __builtin_wasm_max_f32(vi2, vi3);
157         const float vmax45 = __builtin_wasm_max_f32(vi4, vi5);
158         const float vmax67 = __builtin_wasm_max_f32(vi6, vi7);
159         const float vmax018 = __builtin_wasm_max_f32(vmax01, vi8);
160 
161         const float vmax2345 = __builtin_wasm_max_f32(vmax23, vmax45);
162         const float vmax01678 = __builtin_wasm_max_f32(vmax018, vmax67);
163         float vout = __builtin_wasm_max_f32(vmax2345, vmax01678);
164         vout = __builtin_wasm_max_f32(vout, voutput_min);
165         vout = __builtin_wasm_min_f32(vout, voutput_max);
166 
167         *o++ = vout;
168       } while (--c != 0);
169     }
170     input = (const float**) ((uintptr_t) input + input_increment);
171     output = (float*) ((uintptr_t) o + output_increment);
172   } while (--output_pixels != 0);
173 }
174