• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$ABC = "0123456789ABCDEFGHIJK"
7#include <assert.h>
8
9#include <xnnpack/math.h>
10#include <xnnpack/spmm.h>
11
12
13void xnn_f32_spmm_ukernel_${MR}x${NR}__scalar${"_unroll" + str(UNROLL) if UNROLL > 1 else ""}(
14    uint32_t m,
15    uint32_t n,
16    const float*restrict a,
17    const float*restrict weights,
18    const int32_t*restrict widx_dmap,
19    const uint32_t*restrict nidx_nnzmap,
20    float*restrict c,
21    const union xnn_f32_output_params params[restrict static 1])
22{
23  assert(m != 0);
24
25  const float vmin = params->scalar.min;
26  const float vmax = params->scalar.max;
27  size_t i = m;
28  while (i >= ${MR}) {
29    const float*restrict w = weights;
30    const int32_t* dmap = widx_dmap;
31    const uint32_t* nnzmap = nidx_nnzmap;
32    size_t j = n;
33    while (j >= ${NR}) {
34      uint32_t nnz = *nnzmap++;
35      $for N in range(0, NR, 1):
36        float vacc0x${N} = *w++;
37        $for M in range(1, MR):
38          float vacc${ABC[M]}x${N} = vacc0x${N};
39      if XNN_LIKELY(nnz != 0) {
40        do {
41          const intptr_t diff = *dmap++;
42          $for M in range(MR):
43            const float va${ABC[M]} = a[${M}];
44          a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
45          $for N in range(0, NR, 1):
46            const float vb${N} = *w++;
47          $for N in range(0, NR, 1):
48            $for M in range(MR):
49              vacc${ABC[M]}x${N} += va${ABC[M]} * vb${N};
50        } while (--nnz != 0);
51      }
52      $for N in range(NR):
53        $for M in range(MR):
54          float vout${ABC[M]}x${N} = math_min_f32(vacc${ABC[M]}x${N}, vmax);
55      $for N in range(NR):
56        $for M in range(MR):
57          vout${ABC[M]}x${N} = math_max_f32(vout${ABC[M]}x${N}, vmin);
58      $for N in range(NR):
59        $for M in range(MR):
60          c[${N} * m + ${M}] = vout${ABC[M]}x${N};
61      c += ${NR} * m;
62      j -= ${NR};
63    }
64    if XNN_UNLIKELY(j != 0) {
65      do {
66        uint32_t nnz = *nnzmap++;
67        float vacc0 = *w++;
68        $for M in range(1, MR):
69          float vacc${ABC[M]} = vacc0;
70        if XNN_LIKELY(nnz != 0) {
71          do {
72            const intptr_t diff = *dmap++;
73            $for M in range(MR):
74              const float va${ABC[M]} = a[${M}];
75            a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
76            const float vb = *w++;
77            $for M in range(MR):
78              vacc${ABC[M]} += va${ABC[M]} * vb;
79          } while (--nnz != 0);
80        }
81        $for M in range(MR):
82          float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
83        $for M in range(MR):
84          vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
85        $for M in range(MR):
86          c[${M}] = vout${ABC[M]};
87        c += m;
88        j -= 1;
89      } while (j != 0);
90    }
91    c -= m * n;
92    c += ${MR};
93    a += ${MR};
94    i -= ${MR};
95  }
96  if XNN_UNLIKELY(i != 0) {
97    $for LOG2M in reversed(range((MR - 1).bit_length())):
98      $SUBMR = 1 << LOG2M
99      if (i & ${SUBMR}) {
100        const float*restrict w = weights;
101        const int32_t* dmap = widx_dmap;
102        const uint32_t* nnzmap = nidx_nnzmap;
103        size_t j = n;
104        while (j >= ${NR}) {
105          uint32_t nnz = *nnzmap++;
106          $for N in range(0, NR, 1):
107            float vacc0x${N} = *w++;
108            $for M in range(1, SUBMR):
109              float vacc${ABC[M]}x${N} = vacc0x${N};
110          if XNN_LIKELY(nnz != 0) {
111            do {
112              const intptr_t diff = *dmap++;
113              $for M in range(SUBMR):
114                const float va${ABC[M]} = a[${M}];
115              a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
116              $for N in range(0, NR, 1):
117                const float vb${N} = *w++;
118              $for N in range(0, NR, 1):
119                $for M in range(SUBMR):
120                  vacc${ABC[M]}x${N} += va${ABC[M]} * vb${N};
121            } while (--nnz != 0);
122          }
123          $for N in range(0, NR, 1):
124            $for M in range(SUBMR):
125              float vout${ABC[M]}x${N} = math_min_f32(vacc${ABC[M]}x${N}, vmax);
126          $for N in range(0, NR, 1):
127            $for M in range(SUBMR):
128              vout${ABC[M]}x${N} = math_max_f32(vout${ABC[M]}x${N}, vmin);
129          $for N in range(0, NR, 1):
130            $for M in range(SUBMR):
131              c[${N} * m + ${M}] = vout${ABC[M]}x${N};
132          c += ${NR} * m;
133          j -= ${NR};
134        }
135        if XNN_UNLIKELY(j != 0) {
136          do {
137            uint32_t nnz = *nnzmap++;
138            float vacc0 = *w++;
139            $for M in range(1, SUBMR):
140              float vacc${ABC[M]} = vacc0;
141            if XNN_LIKELY(nnz != 0) {
142              do {
143                const intptr_t diff = *dmap++;
144                $for M in range(SUBMR):
145                  const float va${ABC[M]} = a[${M}];
146                a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff);
147                const float vb = *w++;
148                $for M in range(SUBMR):
149                  vacc${ABC[M]} += va${ABC[M]} * vb;
150              } while (--nnz != 0);
151            }
152            $for M in range(SUBMR):
153              float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
154            $for M in range(SUBMR):
155              vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
156            $for M in range(SUBMR):
157              c[${M}] = vout${ABC[M]};
158            c += m;
159            j -= 1;
160          } while (j != 0);
161        }
162        c -= m * n;
163        c += ${SUBMR};
164        a += ${SUBMR};
165      }
166  }
167}
168