• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Auto-generated file. Do not edit!
2 //   Template: src/f32-dwconv/up-scalar.c.in
3 //   Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9 
10 #include <assert.h>
11 
12 #include <xnnpack/dwconv.h>
13 #include <xnnpack/math.h>
14 
15 
xnn_f32_dwconv_ukernel_up2x9__scalar(size_t channels,size_t output_width,const float ** input,const float * weights,float * output,size_t input_stride,size_t output_increment,const union xnn_f32_output_params params[restrict static1])16 void xnn_f32_dwconv_ukernel_up2x9__scalar(
17     size_t channels,
18     size_t output_width,
19     const float** input,
20     const float* weights,
21     float* output,
22     size_t input_stride,
23     size_t output_increment,
24     const union xnn_f32_output_params params[restrict static 1])
25 {
26   assert(channels != 0);
27   assert(output_width != 0);
28 
29   const float vmin = params->scalar.min;
30   const float vmax = params->scalar.max;
31   do {
32     const float* i0 = input[0];
33     assert(i0 != NULL);
34     const float* i1 = input[1];
35     assert(i1 != NULL);
36     const float* i2 = input[2];
37     assert(i2 != NULL);
38     const float* i3 = input[3];
39     assert(i3 != NULL);
40     const float* i4 = input[4];
41     assert(i4 != NULL);
42     const float* i5 = input[5];
43     assert(i5 != NULL);
44     const float* i6 = input[6];
45     assert(i6 != NULL);
46     const float* i7 = input[7];
47     assert(i7 != NULL);
48     const float* i8 = input[8];
49     assert(i8 != NULL);
50     input = (const float**) ((uintptr_t) input + input_stride);
51 
52     size_t c = channels;
53     const float* w = weights;
54     for (; c >= 2; c -= 2) {
55       float vacc0p0 = w[0];
56       float vacc1p0 = w[1];
57 
58 
59       const float vi0x0 = i0[0];
60       const float vi0x1 = i0[1];
61       i0 += 2;
62 
63       const float vk0x0 = w[2];
64       vacc0p0 += vi0x0 * vk0x0;
65       const float vk0x1 = w[3];
66       vacc1p0 += vi0x1 * vk0x1;
67 
68       const float vi1x0 = i1[0];
69       const float vi1x1 = i1[1];
70       i1 += 2;
71 
72       const float vk1x0 = w[4];
73       vacc0p0 += vi1x0 * vk1x0;
74       const float vk1x1 = w[5];
75       vacc1p0 += vi1x1 * vk1x1;
76 
77       const float vi2x0 = i2[0];
78       const float vi2x1 = i2[1];
79       i2 += 2;
80 
81       const float vk2x0 = w[6];
82       vacc0p0 += vi2x0 * vk2x0;
83       const float vk2x1 = w[7];
84       vacc1p0 += vi2x1 * vk2x1;
85 
86       const float vi3x0 = i3[0];
87       const float vi3x1 = i3[1];
88       i3 += 2;
89 
90       const float vk3x0 = w[8];
91       vacc0p0 += vi3x0 * vk3x0;
92       const float vk3x1 = w[9];
93       vacc1p0 += vi3x1 * vk3x1;
94 
95       const float vi4x0 = i4[0];
96       const float vi4x1 = i4[1];
97       i4 += 2;
98 
99       const float vk4x0 = w[10];
100       vacc0p0 += vi4x0 * vk4x0;
101       const float vk4x1 = w[11];
102       vacc1p0 += vi4x1 * vk4x1;
103 
104       const float vi5x0 = i5[0];
105       const float vi5x1 = i5[1];
106       i5 += 2;
107 
108       const float vk5x0 = w[12];
109       vacc0p0 += vi5x0 * vk5x0;
110       const float vk5x1 = w[13];
111       vacc1p0 += vi5x1 * vk5x1;
112 
113       const float vi6x0 = i6[0];
114       const float vi6x1 = i6[1];
115       i6 += 2;
116 
117       const float vk6x0 = w[14];
118       vacc0p0 += vi6x0 * vk6x0;
119       const float vk6x1 = w[15];
120       vacc1p0 += vi6x1 * vk6x1;
121 
122       const float vi7x0 = i7[0];
123       const float vi7x1 = i7[1];
124       i7 += 2;
125 
126       const float vk7x0 = w[16];
127       vacc0p0 += vi7x0 * vk7x0;
128       const float vk7x1 = w[17];
129       vacc1p0 += vi7x1 * vk7x1;
130 
131       const float vi8x0 = i8[0];
132       const float vi8x1 = i8[1];
133       i8 += 2;
134 
135       const float vk8x0 = w[18];
136       vacc0p0 += vi8x0 * vk8x0;
137       const float vk8x1 = w[19];
138       vacc1p0 += vi8x1 * vk8x1;
139 
140       w += 20;
141 
142 
143       float vacc0 = math_max_f32(vacc0p0, vmin);
144       float vacc1 = math_max_f32(vacc1p0, vmin);
145 
146       vacc0 = math_min_f32(vacc0, vmax);
147       vacc1 = math_min_f32(vacc1, vmax);
148 
149       output[0] = vacc0;
150       output[1] = vacc1;
151       output += 2;
152     }
153     for (; c >= 1; c -= 1) {
154       float vacc0p0 = *w++;
155 
156       const float vi0 = *i0++;
157       const float vk0 = w[1];
158       vacc0p0 += vi0 * vk0;
159       const float vi1 = *i1++;
160       const float vk1 = w[3];
161       vacc0p0 += vi1 * vk1;
162       const float vi2 = *i2++;
163       const float vk2 = w[5];
164       vacc0p0 += vi2 * vk2;
165       const float vi3 = *i3++;
166       const float vk3 = w[7];
167       vacc0p0 += vi3 * vk3;
168       const float vi4 = *i4++;
169       const float vk4 = w[9];
170       vacc0p0 += vi4 * vk4;
171       const float vi5 = *i5++;
172       const float vk5 = w[11];
173       vacc0p0 += vi5 * vk5;
174       const float vi6 = *i6++;
175       const float vk6 = w[13];
176       vacc0p0 += vi6 * vk6;
177       const float vi7 = *i7++;
178       const float vk7 = w[15];
179       vacc0p0 += vi7 * vk7;
180       const float vi8 = *i8++;
181       const float vk8 = w[17];
182       vacc0p0 += vi8 * vk8;
183 
184 
185       float vacc0 = math_max_f32(vacc0p0, vmin);
186       vacc0 = math_min_f32(vacc0, vmax);
187       *output++ = vacc0;
188     }
189 
190     output = (float*) ((uintptr_t) output + output_increment);
191   } while (--output_width != 0);
192 }
193