• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Auto-generated file. Do not edit!
2 //   Template: src/f32-spmm/scalar-pipelined.c.in
3 //   Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9 
10 #include <assert.h>
11 
12 #include <xnnpack/math.h>
13 #include <xnnpack/spmm.h>
14 
15 
xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined(size_t mc,size_t nc,const float * restrict input,const float * restrict weights,const int32_t * restrict widx_dmap,const uint32_t * restrict nidx_nnzmap,float * restrict output,size_t output_stride,const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])16 void xnn_f32_spmm_minmax_ukernel_4x1__scalar_pipelined(
17     size_t mc,
18     size_t nc,
19     const float*restrict input,
20     const float*restrict weights,
21     const int32_t*restrict widx_dmap,
22     const uint32_t*restrict nidx_nnzmap,
23     float*restrict output,
24     size_t output_stride,
25     const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
26 {
27   assert(mc != 0);
28   assert(mc % sizeof(float) == 0);
29   assert(nc != 0);
30 
31   const float vmin = params->scalar.min;
32   const float vmax = params->scalar.max;
33   size_t output_decrement = output_stride * nc - 4 * sizeof(float);
34   while XNN_LIKELY(mc >= 4 * sizeof(float)) {
35     const float*restrict w = weights;
36     const int32_t* dmap = widx_dmap;
37     const uint32_t* nnzmap = nidx_nnzmap;
38     float vw = *w++;
39     intptr_t diff = *dmap++;
40     float vi0 = input[0];
41     float vi1 = input[1];
42     float vi2 = input[2];
43     float vi3 = input[3];
44     size_t n = nc;
45     do {
46       uint32_t nnz = *nnzmap++;
47       float vacc0 = vw;
48       float vacc1 = vw;
49       float vacc2 = vw;
50       float vacc3 = vw;
51       vw = *w++;
52       if XNN_LIKELY(nnz != 0) {
53         do {
54           vacc0 += vi0 * vw;
55           vacc1 += vi1 * vw;
56           vacc2 += vi2 * vw;
57           vacc3 += vi3 * vw;
58           input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
59 
60           diff = *dmap++;
61           vw = *w++;
62           vi0 = input[0];
63           vi1 = input[1];
64           vi2 = input[2];
65           vi3 = input[3];
66         } while (--nnz != 0);
67       }
68       float vout0 = math_min_f32(vacc0, vmax);
69       float vout1 = math_min_f32(vacc1, vmax);
70       float vout2 = math_min_f32(vacc2, vmax);
71       float vout3 = math_min_f32(vacc3, vmax);
72       vout0 = math_max_f32(vout0, vmin);
73       vout1 = math_max_f32(vout1, vmin);
74       vout2 = math_max_f32(vout2, vmin);
75       vout3 = math_max_f32(vout3, vmin);
76       output[0] = vout0;
77       output[1] = vout1;
78       output[2] = vout2;
79       output[3] = vout3;
80       output = (float*restrict) ((uintptr_t) output + output_stride);
81     } while (--n != 0);
82     output = (float*restrict) ((uintptr_t) output - output_decrement);
83     input += 4;
84     mc -= 4 * sizeof(float);
85   }
86   if XNN_UNLIKELY(mc != 0) {
87     output_decrement += 2 * sizeof(float);
88     if (mc & (2 * sizeof(float))) {
89       const float*restrict w = weights;
90       const int32_t* dmap = widx_dmap;
91       const uint32_t* nnzmap = nidx_nnzmap;
92       float vw = *w++;
93       intptr_t diff = *dmap++;
94       float vi0 = input[0];
95       float vi1 = input[1];
96       size_t n = nc;
97       do {
98         uint32_t nnz = *nnzmap++;
99         float vacc0 = vw;
100         float vacc1 = vw;
101         vw = *w++;
102         if XNN_LIKELY(nnz != 0) {
103           do {
104             vacc0 += vi0 * vw;
105             vacc1 += vi1 * vw;
106             input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
107 
108             diff = *dmap++;
109             vw = *w++;
110             vi0 = input[0];
111             vi1 = input[1];
112           } while (--nnz != 0);
113         }
114         float vout0 = math_min_f32(vacc0, vmax);
115         float vout1 = math_min_f32(vacc1, vmax);
116         vout0 = math_max_f32(vout0, vmin);
117         vout1 = math_max_f32(vout1, vmin);
118         output[0] = vout0;
119         output[1] = vout1;
120         output = (float*restrict) ((uintptr_t) output + output_stride);
121       } while (--n != 0);
122       output = (float*restrict) ((uintptr_t) output - output_decrement);
123       input += 2;
124     }
125     output_decrement += 1 * sizeof(float);
126     if (mc & (1 * sizeof(float))) {
127       const float*restrict w = weights;
128       const int32_t* dmap = widx_dmap;
129       const uint32_t* nnzmap = nidx_nnzmap;
130       float vw = *w++;
131       intptr_t diff = *dmap++;
132       float vi0 = input[0];
133       size_t n = nc;
134       do {
135         uint32_t nnz = *nnzmap++;
136         float vacc0 = vw;
137         vw = *w++;
138         if XNN_LIKELY(nnz != 0) {
139           do {
140             vacc0 += vi0 * vw;
141             input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
142 
143             diff = *dmap++;
144             vw = *w++;
145             vi0 = input[0];
146           } while (--nnz != 0);
147         }
148         float vout0 = math_min_f32(vacc0, vmax);
149         vout0 = math_max_f32(vout0, vmin);
150         output[0] = vout0;
151         output = (float*restrict) ((uintptr_t) output + output_stride);
152       } while (--n != 0);
153       output = (float*restrict) ((uintptr_t) output - output_decrement);
154       input += 1;
155     }
156   }
157 }
158