• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Auto-generated file. Do not edit!
2 //   Template: src/f32-spmm/scalar.c.in
3 //   Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9 
10 #include <assert.h>
11 
12 #include <xnnpack/math.h>
13 #include <xnnpack/spmm.h>
14 
15 
xnn_f32_spmm_ukernel_8x1__scalar(uint32_t m,uint32_t n,const float * restrict a,const float * restrict weights,const int32_t * restrict widx_dmap,const uint32_t * restrict nidx_nnzmap,float * restrict c,const union xnn_f32_output_params params[restrict static1])16 void xnn_f32_spmm_ukernel_8x1__scalar(
17     uint32_t m,
18     uint32_t n,
19     const float*restrict a,
20     const float*restrict weights,
21     const int32_t*restrict widx_dmap,
22     const uint32_t*restrict nidx_nnzmap,
23     float*restrict c,
24     const union xnn_f32_output_params params[restrict static 1])
25 {
26   assert(m != 0);
27 
28   const float vmin = params->scalar.min;
29   const float vmax = params->scalar.max;
30   size_t i = m;
31   while (i >= 8) {
32     const float*restrict w = weights;
33     const int32_t* dmap = widx_dmap;
34     const uint32_t* nnzmap = nidx_nnzmap;
35     size_t j = n;
36     while (j >= 1) {
37       uint32_t nnz = *nnzmap++;
38       float vacc0x0 = *w++;
39       float vacc1x0 = vacc0x0;
40       float vacc2x0 = vacc0x0;
41       float vacc3x0 = vacc0x0;
42       float vacc4x0 = vacc0x0;
43       float vacc5x0 = vacc0x0;
44       float vacc6x0 = vacc0x0;
45       float vacc7x0 = vacc0x0;
46       if XNN_LIKELY(nnz != 0) {
47         do {
48           const intptr_t diff = *dmap++;
49           const float va0 = a[0];
50           const float va1 = a[1];
51           const float va2 = a[2];
52           const float va3 = a[3];
53           const float va4 = a[4];
54           const float va5 = a[5];
55           const float va6 = a[6];
56           const float va7 = a[7];
57           a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
58           const float vb0 = *w++;
59           vacc0x0 += va0 * vb0;
60           vacc1x0 += va1 * vb0;
61           vacc2x0 += va2 * vb0;
62           vacc3x0 += va3 * vb0;
63           vacc4x0 += va4 * vb0;
64           vacc5x0 += va5 * vb0;
65           vacc6x0 += va6 * vb0;
66           vacc7x0 += va7 * vb0;
67         } while (--nnz != 0);
68       }
69       float vout0x0 = math_min_f32(vacc0x0, vmax);
70       float vout1x0 = math_min_f32(vacc1x0, vmax);
71       float vout2x0 = math_min_f32(vacc2x0, vmax);
72       float vout3x0 = math_min_f32(vacc3x0, vmax);
73       float vout4x0 = math_min_f32(vacc4x0, vmax);
74       float vout5x0 = math_min_f32(vacc5x0, vmax);
75       float vout6x0 = math_min_f32(vacc6x0, vmax);
76       float vout7x0 = math_min_f32(vacc7x0, vmax);
77       vout0x0 = math_max_f32(vout0x0, vmin);
78       vout1x0 = math_max_f32(vout1x0, vmin);
79       vout2x0 = math_max_f32(vout2x0, vmin);
80       vout3x0 = math_max_f32(vout3x0, vmin);
81       vout4x0 = math_max_f32(vout4x0, vmin);
82       vout5x0 = math_max_f32(vout5x0, vmin);
83       vout6x0 = math_max_f32(vout6x0, vmin);
84       vout7x0 = math_max_f32(vout7x0, vmin);
85       c[0 * m + 0] = vout0x0;
86       c[0 * m + 1] = vout1x0;
87       c[0 * m + 2] = vout2x0;
88       c[0 * m + 3] = vout3x0;
89       c[0 * m + 4] = vout4x0;
90       c[0 * m + 5] = vout5x0;
91       c[0 * m + 6] = vout6x0;
92       c[0 * m + 7] = vout7x0;
93       c += 1 * m;
94       j -= 1;
95     }
96     if XNN_UNLIKELY(j != 0) {
97       do {
98         uint32_t nnz = *nnzmap++;
99         float vacc0 = *w++;
100         float vacc1 = vacc0;
101         float vacc2 = vacc0;
102         float vacc3 = vacc0;
103         float vacc4 = vacc0;
104         float vacc5 = vacc0;
105         float vacc6 = vacc0;
106         float vacc7 = vacc0;
107         if XNN_LIKELY(nnz != 0) {
108           do {
109             const intptr_t diff = *dmap++;
110             const float va0 = a[0];
111             const float va1 = a[1];
112             const float va2 = a[2];
113             const float va3 = a[3];
114             const float va4 = a[4];
115             const float va5 = a[5];
116             const float va6 = a[6];
117             const float va7 = a[7];
118             a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
119             const float vb = *w++;
120             vacc0 += va0 * vb;
121             vacc1 += va1 * vb;
122             vacc2 += va2 * vb;
123             vacc3 += va3 * vb;
124             vacc4 += va4 * vb;
125             vacc5 += va5 * vb;
126             vacc6 += va6 * vb;
127             vacc7 += va7 * vb;
128           } while (--nnz != 0);
129         }
130         float vout0 = math_min_f32(vacc0, vmax);
131         float vout1 = math_min_f32(vacc1, vmax);
132         float vout2 = math_min_f32(vacc2, vmax);
133         float vout3 = math_min_f32(vacc3, vmax);
134         float vout4 = math_min_f32(vacc4, vmax);
135         float vout5 = math_min_f32(vacc5, vmax);
136         float vout6 = math_min_f32(vacc6, vmax);
137         float vout7 = math_min_f32(vacc7, vmax);
138         vout0 = math_max_f32(vout0, vmin);
139         vout1 = math_max_f32(vout1, vmin);
140         vout2 = math_max_f32(vout2, vmin);
141         vout3 = math_max_f32(vout3, vmin);
142         vout4 = math_max_f32(vout4, vmin);
143         vout5 = math_max_f32(vout5, vmin);
144         vout6 = math_max_f32(vout6, vmin);
145         vout7 = math_max_f32(vout7, vmin);
146         c[0] = vout0;
147         c[1] = vout1;
148         c[2] = vout2;
149         c[3] = vout3;
150         c[4] = vout4;
151         c[5] = vout5;
152         c[6] = vout6;
153         c[7] = vout7;
154         c += m;
155         j -= 1;
156       } while (j != 0);
157     }
158     c -= m * n;
159     c += 8;
160     a += 8;
161     i -= 8;
162   }
163   if XNN_UNLIKELY(i != 0) {
164     if (i & 4) {
165       const float*restrict w = weights;
166       const int32_t* dmap = widx_dmap;
167       const uint32_t* nnzmap = nidx_nnzmap;
168       size_t j = n;
169       while (j >= 1) {
170         uint32_t nnz = *nnzmap++;
171         float vacc0x0 = *w++;
172         float vacc1x0 = vacc0x0;
173         float vacc2x0 = vacc0x0;
174         float vacc3x0 = vacc0x0;
175         if XNN_LIKELY(nnz != 0) {
176           do {
177             const intptr_t diff = *dmap++;
178             const float va0 = a[0];
179             const float va1 = a[1];
180             const float va2 = a[2];
181             const float va3 = a[3];
182             a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
183             const float vb0 = *w++;
184             vacc0x0 += va0 * vb0;
185             vacc1x0 += va1 * vb0;
186             vacc2x0 += va2 * vb0;
187             vacc3x0 += va3 * vb0;
188           } while (--nnz != 0);
189         }
190         float vout0x0 = math_min_f32(vacc0x0, vmax);
191         float vout1x0 = math_min_f32(vacc1x0, vmax);
192         float vout2x0 = math_min_f32(vacc2x0, vmax);
193         float vout3x0 = math_min_f32(vacc3x0, vmax);
194         vout0x0 = math_max_f32(vout0x0, vmin);
195         vout1x0 = math_max_f32(vout1x0, vmin);
196         vout2x0 = math_max_f32(vout2x0, vmin);
197         vout3x0 = math_max_f32(vout3x0, vmin);
198         c[0 * m + 0] = vout0x0;
199         c[0 * m + 1] = vout1x0;
200         c[0 * m + 2] = vout2x0;
201         c[0 * m + 3] = vout3x0;
202         c += 1 * m;
203         j -= 1;
204       }
205       if XNN_UNLIKELY(j != 0) {
206         do {
207           uint32_t nnz = *nnzmap++;
208           float vacc0 = *w++;
209           float vacc1 = vacc0;
210           float vacc2 = vacc0;
211           float vacc3 = vacc0;
212           if XNN_LIKELY(nnz != 0) {
213             do {
214               const intptr_t diff = *dmap++;
215               const float va0 = a[0];
216               const float va1 = a[1];
217               const float va2 = a[2];
218               const float va3 = a[3];
219               a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
220               const float vb = *w++;
221               vacc0 += va0 * vb;
222               vacc1 += va1 * vb;
223               vacc2 += va2 * vb;
224               vacc3 += va3 * vb;
225             } while (--nnz != 0);
226           }
227           float vout0 = math_min_f32(vacc0, vmax);
228           float vout1 = math_min_f32(vacc1, vmax);
229           float vout2 = math_min_f32(vacc2, vmax);
230           float vout3 = math_min_f32(vacc3, vmax);
231           vout0 = math_max_f32(vout0, vmin);
232           vout1 = math_max_f32(vout1, vmin);
233           vout2 = math_max_f32(vout2, vmin);
234           vout3 = math_max_f32(vout3, vmin);
235           c[0] = vout0;
236           c[1] = vout1;
237           c[2] = vout2;
238           c[3] = vout3;
239           c += m;
240           j -= 1;
241         } while (j != 0);
242       }
243       c -= m * n;
244       c += 4;
245       a += 4;
246     }
247     if (i & 2) {
248       const float*restrict w = weights;
249       const int32_t* dmap = widx_dmap;
250       const uint32_t* nnzmap = nidx_nnzmap;
251       size_t j = n;
252       while (j >= 1) {
253         uint32_t nnz = *nnzmap++;
254         float vacc0x0 = *w++;
255         float vacc1x0 = vacc0x0;
256         if XNN_LIKELY(nnz != 0) {
257           do {
258             const intptr_t diff = *dmap++;
259             const float va0 = a[0];
260             const float va1 = a[1];
261             a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
262             const float vb0 = *w++;
263             vacc0x0 += va0 * vb0;
264             vacc1x0 += va1 * vb0;
265           } while (--nnz != 0);
266         }
267         float vout0x0 = math_min_f32(vacc0x0, vmax);
268         float vout1x0 = math_min_f32(vacc1x0, vmax);
269         vout0x0 = math_max_f32(vout0x0, vmin);
270         vout1x0 = math_max_f32(vout1x0, vmin);
271         c[0 * m + 0] = vout0x0;
272         c[0 * m + 1] = vout1x0;
273         c += 1 * m;
274         j -= 1;
275       }
276       if XNN_UNLIKELY(j != 0) {
277         do {
278           uint32_t nnz = *nnzmap++;
279           float vacc0 = *w++;
280           float vacc1 = vacc0;
281           if XNN_LIKELY(nnz != 0) {
282             do {
283               const intptr_t diff = *dmap++;
284               const float va0 = a[0];
285               const float va1 = a[1];
286               a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
287               const float vb = *w++;
288               vacc0 += va0 * vb;
289               vacc1 += va1 * vb;
290             } while (--nnz != 0);
291           }
292           float vout0 = math_min_f32(vacc0, vmax);
293           float vout1 = math_min_f32(vacc1, vmax);
294           vout0 = math_max_f32(vout0, vmin);
295           vout1 = math_max_f32(vout1, vmin);
296           c[0] = vout0;
297           c[1] = vout1;
298           c += m;
299           j -= 1;
300         } while (j != 0);
301       }
302       c -= m * n;
303       c += 2;
304       a += 2;
305     }
306     if (i & 1) {
307       const float*restrict w = weights;
308       const int32_t* dmap = widx_dmap;
309       const uint32_t* nnzmap = nidx_nnzmap;
310       size_t j = n;
311       while (j >= 1) {
312         uint32_t nnz = *nnzmap++;
313         float vacc0x0 = *w++;
314         if XNN_LIKELY(nnz != 0) {
315           do {
316             const intptr_t diff = *dmap++;
317             const float va0 = a[0];
318             a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
319             const float vb0 = *w++;
320             vacc0x0 += va0 * vb0;
321           } while (--nnz != 0);
322         }
323         float vout0x0 = math_min_f32(vacc0x0, vmax);
324         vout0x0 = math_max_f32(vout0x0, vmin);
325         c[0 * m + 0] = vout0x0;
326         c += 1 * m;
327         j -= 1;
328       }
329       if XNN_UNLIKELY(j != 0) {
330         do {
331           uint32_t nnz = *nnzmap++;
332           float vacc0 = *w++;
333           if XNN_LIKELY(nnz != 0) {
334             do {
335               const intptr_t diff = *dmap++;
336               const float va0 = a[0];
337               a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
338               const float vb = *w++;
339               vacc0 += va0 * vb;
340             } while (--nnz != 0);
341           }
342           float vout0 = math_min_f32(vacc0, vmax);
343           vout0 = math_max_f32(vout0, vmin);
344           c[0] = vout0;
345           c += m;
346           j -= 1;
347         } while (j != 0);
348       }
349       c -= m * n;
350       c += 1;
351       a += 1;
352     }
353   }
354 }
355