1 // Auto-generated file. Do not edit!
2 // Template: src/f32-spmm/scalar.c.in
3 // Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9
10 #include <assert.h>
11
12 #include <xnnpack/math.h>
13 #include <xnnpack/spmm.h>
14
15
xnn_f32_spmm_ukernel_8x1__scalar(uint32_t m,uint32_t n,const float * restrict a,const float * restrict weights,const int32_t * restrict widx_dmap,const uint32_t * restrict nidx_nnzmap,float * restrict c,const union xnn_f32_output_params params[restrict static1])16 void xnn_f32_spmm_ukernel_8x1__scalar(
17 uint32_t m,
18 uint32_t n,
19 const float*restrict a,
20 const float*restrict weights,
21 const int32_t*restrict widx_dmap,
22 const uint32_t*restrict nidx_nnzmap,
23 float*restrict c,
24 const union xnn_f32_output_params params[restrict static 1])
25 {
26 assert(m != 0);
27
28 const float vmin = params->scalar.min;
29 const float vmax = params->scalar.max;
30 size_t i = m;
31 while (i >= 8) {
32 const float*restrict w = weights;
33 const int32_t* dmap = widx_dmap;
34 const uint32_t* nnzmap = nidx_nnzmap;
35 size_t j = n;
36 while (j >= 1) {
37 uint32_t nnz = *nnzmap++;
38 float vacc0x0 = *w++;
39 float vacc1x0 = vacc0x0;
40 float vacc2x0 = vacc0x0;
41 float vacc3x0 = vacc0x0;
42 float vacc4x0 = vacc0x0;
43 float vacc5x0 = vacc0x0;
44 float vacc6x0 = vacc0x0;
45 float vacc7x0 = vacc0x0;
46 if XNN_LIKELY(nnz != 0) {
47 do {
48 const intptr_t diff = *dmap++;
49 const float va0 = a[0];
50 const float va1 = a[1];
51 const float va2 = a[2];
52 const float va3 = a[3];
53 const float va4 = a[4];
54 const float va5 = a[5];
55 const float va6 = a[6];
56 const float va7 = a[7];
57 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
58 const float vb0 = *w++;
59 vacc0x0 += va0 * vb0;
60 vacc1x0 += va1 * vb0;
61 vacc2x0 += va2 * vb0;
62 vacc3x0 += va3 * vb0;
63 vacc4x0 += va4 * vb0;
64 vacc5x0 += va5 * vb0;
65 vacc6x0 += va6 * vb0;
66 vacc7x0 += va7 * vb0;
67 } while (--nnz != 0);
68 }
69 float vout0x0 = math_min_f32(vacc0x0, vmax);
70 float vout1x0 = math_min_f32(vacc1x0, vmax);
71 float vout2x0 = math_min_f32(vacc2x0, vmax);
72 float vout3x0 = math_min_f32(vacc3x0, vmax);
73 float vout4x0 = math_min_f32(vacc4x0, vmax);
74 float vout5x0 = math_min_f32(vacc5x0, vmax);
75 float vout6x0 = math_min_f32(vacc6x0, vmax);
76 float vout7x0 = math_min_f32(vacc7x0, vmax);
77 vout0x0 = math_max_f32(vout0x0, vmin);
78 vout1x0 = math_max_f32(vout1x0, vmin);
79 vout2x0 = math_max_f32(vout2x0, vmin);
80 vout3x0 = math_max_f32(vout3x0, vmin);
81 vout4x0 = math_max_f32(vout4x0, vmin);
82 vout5x0 = math_max_f32(vout5x0, vmin);
83 vout6x0 = math_max_f32(vout6x0, vmin);
84 vout7x0 = math_max_f32(vout7x0, vmin);
85 c[0 * m + 0] = vout0x0;
86 c[0 * m + 1] = vout1x0;
87 c[0 * m + 2] = vout2x0;
88 c[0 * m + 3] = vout3x0;
89 c[0 * m + 4] = vout4x0;
90 c[0 * m + 5] = vout5x0;
91 c[0 * m + 6] = vout6x0;
92 c[0 * m + 7] = vout7x0;
93 c += 1 * m;
94 j -= 1;
95 }
96 if XNN_UNLIKELY(j != 0) {
97 do {
98 uint32_t nnz = *nnzmap++;
99 float vacc0 = *w++;
100 float vacc1 = vacc0;
101 float vacc2 = vacc0;
102 float vacc3 = vacc0;
103 float vacc4 = vacc0;
104 float vacc5 = vacc0;
105 float vacc6 = vacc0;
106 float vacc7 = vacc0;
107 if XNN_LIKELY(nnz != 0) {
108 do {
109 const intptr_t diff = *dmap++;
110 const float va0 = a[0];
111 const float va1 = a[1];
112 const float va2 = a[2];
113 const float va3 = a[3];
114 const float va4 = a[4];
115 const float va5 = a[5];
116 const float va6 = a[6];
117 const float va7 = a[7];
118 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
119 const float vb = *w++;
120 vacc0 += va0 * vb;
121 vacc1 += va1 * vb;
122 vacc2 += va2 * vb;
123 vacc3 += va3 * vb;
124 vacc4 += va4 * vb;
125 vacc5 += va5 * vb;
126 vacc6 += va6 * vb;
127 vacc7 += va7 * vb;
128 } while (--nnz != 0);
129 }
130 float vout0 = math_min_f32(vacc0, vmax);
131 float vout1 = math_min_f32(vacc1, vmax);
132 float vout2 = math_min_f32(vacc2, vmax);
133 float vout3 = math_min_f32(vacc3, vmax);
134 float vout4 = math_min_f32(vacc4, vmax);
135 float vout5 = math_min_f32(vacc5, vmax);
136 float vout6 = math_min_f32(vacc6, vmax);
137 float vout7 = math_min_f32(vacc7, vmax);
138 vout0 = math_max_f32(vout0, vmin);
139 vout1 = math_max_f32(vout1, vmin);
140 vout2 = math_max_f32(vout2, vmin);
141 vout3 = math_max_f32(vout3, vmin);
142 vout4 = math_max_f32(vout4, vmin);
143 vout5 = math_max_f32(vout5, vmin);
144 vout6 = math_max_f32(vout6, vmin);
145 vout7 = math_max_f32(vout7, vmin);
146 c[0] = vout0;
147 c[1] = vout1;
148 c[2] = vout2;
149 c[3] = vout3;
150 c[4] = vout4;
151 c[5] = vout5;
152 c[6] = vout6;
153 c[7] = vout7;
154 c += m;
155 j -= 1;
156 } while (j != 0);
157 }
158 c -= m * n;
159 c += 8;
160 a += 8;
161 i -= 8;
162 }
163 if XNN_UNLIKELY(i != 0) {
164 if (i & 4) {
165 const float*restrict w = weights;
166 const int32_t* dmap = widx_dmap;
167 const uint32_t* nnzmap = nidx_nnzmap;
168 size_t j = n;
169 while (j >= 1) {
170 uint32_t nnz = *nnzmap++;
171 float vacc0x0 = *w++;
172 float vacc1x0 = vacc0x0;
173 float vacc2x0 = vacc0x0;
174 float vacc3x0 = vacc0x0;
175 if XNN_LIKELY(nnz != 0) {
176 do {
177 const intptr_t diff = *dmap++;
178 const float va0 = a[0];
179 const float va1 = a[1];
180 const float va2 = a[2];
181 const float va3 = a[3];
182 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
183 const float vb0 = *w++;
184 vacc0x0 += va0 * vb0;
185 vacc1x0 += va1 * vb0;
186 vacc2x0 += va2 * vb0;
187 vacc3x0 += va3 * vb0;
188 } while (--nnz != 0);
189 }
190 float vout0x0 = math_min_f32(vacc0x0, vmax);
191 float vout1x0 = math_min_f32(vacc1x0, vmax);
192 float vout2x0 = math_min_f32(vacc2x0, vmax);
193 float vout3x0 = math_min_f32(vacc3x0, vmax);
194 vout0x0 = math_max_f32(vout0x0, vmin);
195 vout1x0 = math_max_f32(vout1x0, vmin);
196 vout2x0 = math_max_f32(vout2x0, vmin);
197 vout3x0 = math_max_f32(vout3x0, vmin);
198 c[0 * m + 0] = vout0x0;
199 c[0 * m + 1] = vout1x0;
200 c[0 * m + 2] = vout2x0;
201 c[0 * m + 3] = vout3x0;
202 c += 1 * m;
203 j -= 1;
204 }
205 if XNN_UNLIKELY(j != 0) {
206 do {
207 uint32_t nnz = *nnzmap++;
208 float vacc0 = *w++;
209 float vacc1 = vacc0;
210 float vacc2 = vacc0;
211 float vacc3 = vacc0;
212 if XNN_LIKELY(nnz != 0) {
213 do {
214 const intptr_t diff = *dmap++;
215 const float va0 = a[0];
216 const float va1 = a[1];
217 const float va2 = a[2];
218 const float va3 = a[3];
219 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
220 const float vb = *w++;
221 vacc0 += va0 * vb;
222 vacc1 += va1 * vb;
223 vacc2 += va2 * vb;
224 vacc3 += va3 * vb;
225 } while (--nnz != 0);
226 }
227 float vout0 = math_min_f32(vacc0, vmax);
228 float vout1 = math_min_f32(vacc1, vmax);
229 float vout2 = math_min_f32(vacc2, vmax);
230 float vout3 = math_min_f32(vacc3, vmax);
231 vout0 = math_max_f32(vout0, vmin);
232 vout1 = math_max_f32(vout1, vmin);
233 vout2 = math_max_f32(vout2, vmin);
234 vout3 = math_max_f32(vout3, vmin);
235 c[0] = vout0;
236 c[1] = vout1;
237 c[2] = vout2;
238 c[3] = vout3;
239 c += m;
240 j -= 1;
241 } while (j != 0);
242 }
243 c -= m * n;
244 c += 4;
245 a += 4;
246 }
247 if (i & 2) {
248 const float*restrict w = weights;
249 const int32_t* dmap = widx_dmap;
250 const uint32_t* nnzmap = nidx_nnzmap;
251 size_t j = n;
252 while (j >= 1) {
253 uint32_t nnz = *nnzmap++;
254 float vacc0x0 = *w++;
255 float vacc1x0 = vacc0x0;
256 if XNN_LIKELY(nnz != 0) {
257 do {
258 const intptr_t diff = *dmap++;
259 const float va0 = a[0];
260 const float va1 = a[1];
261 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
262 const float vb0 = *w++;
263 vacc0x0 += va0 * vb0;
264 vacc1x0 += va1 * vb0;
265 } while (--nnz != 0);
266 }
267 float vout0x0 = math_min_f32(vacc0x0, vmax);
268 float vout1x0 = math_min_f32(vacc1x0, vmax);
269 vout0x0 = math_max_f32(vout0x0, vmin);
270 vout1x0 = math_max_f32(vout1x0, vmin);
271 c[0 * m + 0] = vout0x0;
272 c[0 * m + 1] = vout1x0;
273 c += 1 * m;
274 j -= 1;
275 }
276 if XNN_UNLIKELY(j != 0) {
277 do {
278 uint32_t nnz = *nnzmap++;
279 float vacc0 = *w++;
280 float vacc1 = vacc0;
281 if XNN_LIKELY(nnz != 0) {
282 do {
283 const intptr_t diff = *dmap++;
284 const float va0 = a[0];
285 const float va1 = a[1];
286 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
287 const float vb = *w++;
288 vacc0 += va0 * vb;
289 vacc1 += va1 * vb;
290 } while (--nnz != 0);
291 }
292 float vout0 = math_min_f32(vacc0, vmax);
293 float vout1 = math_min_f32(vacc1, vmax);
294 vout0 = math_max_f32(vout0, vmin);
295 vout1 = math_max_f32(vout1, vmin);
296 c[0] = vout0;
297 c[1] = vout1;
298 c += m;
299 j -= 1;
300 } while (j != 0);
301 }
302 c -= m * n;
303 c += 2;
304 a += 2;
305 }
306 if (i & 1) {
307 const float*restrict w = weights;
308 const int32_t* dmap = widx_dmap;
309 const uint32_t* nnzmap = nidx_nnzmap;
310 size_t j = n;
311 while (j >= 1) {
312 uint32_t nnz = *nnzmap++;
313 float vacc0x0 = *w++;
314 if XNN_LIKELY(nnz != 0) {
315 do {
316 const intptr_t diff = *dmap++;
317 const float va0 = a[0];
318 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
319 const float vb0 = *w++;
320 vacc0x0 += va0 * vb0;
321 } while (--nnz != 0);
322 }
323 float vout0x0 = math_min_f32(vacc0x0, vmax);
324 vout0x0 = math_max_f32(vout0x0, vmin);
325 c[0 * m + 0] = vout0x0;
326 c += 1 * m;
327 j -= 1;
328 }
329 if XNN_UNLIKELY(j != 0) {
330 do {
331 uint32_t nnz = *nnzmap++;
332 float vacc0 = *w++;
333 if XNN_LIKELY(nnz != 0) {
334 do {
335 const intptr_t diff = *dmap++;
336 const float va0 = a[0];
337 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
338 const float vb = *w++;
339 vacc0 += va0 * vb;
340 } while (--nnz != 0);
341 }
342 float vout0 = math_min_f32(vacc0, vmax);
343 vout0 = math_max_f32(vout0, vmin);
344 c[0] = vout0;
345 c += m;
346 j -= 1;
347 } while (j != 0);
348 }
349 c -= m * n;
350 c += 1;
351 a += 1;
352 }
353 }
354 }
355