• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
7#include <assert.h>
8
9#include <xnnpack/math.h>
10#include <xnnpack/spmm.h>
11
12
13void xnn_f32_spmm_minmax_ukernel_${MR}x${NR}__scalar${"_x" + str(UNROLL) if UNROLL > 1 else ""}(
14    size_t mc,
15    size_t nc,
16    const float*restrict input,
17    const float*restrict weights,
18    const int32_t*restrict widx_dmap,
19    const uint32_t*restrict nidx_nnzmap,
20    float*restrict output,
21    size_t output_stride,
22    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
23{
24  assert(mc != 0);
25  assert(mc % sizeof(float) == 0);
26  assert(nc != 0);
27
28  const float vmin = params->scalar.min;
29  const float vmax = params->scalar.max;
30  size_t output_decrement = output_stride * nc - ${MR} * sizeof(float);
31  while (mc >= ${MR} * sizeof(float)) {
32    const float*restrict w = weights;
33    const int32_t* dmap = widx_dmap;
34    const uint32_t* nnzmap = nidx_nnzmap;
35    size_t n = nc;
36    while (n >= ${NR}) {
37      uint32_t nnz = *nnzmap++;
38      $for N in range(0, NR, 1):
39        float vacc0x${N} = *w++;
40        $for M in range(1, MR):
41          float vacc${ABC[M]}x${N} = vacc0x${N};
42      if XNN_LIKELY(nnz != 0) {
43        do {
44          const intptr_t diff = *dmap++;
45          $for M in range(MR):
46            const float vi${ABC[M]} = input[${M}];
47          input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
48          $for N in range(0, NR, 1):
49            const float vw${N} = *w++;
50          $for N in range(0, NR, 1):
51            $for M in range(MR):
52              vacc${ABC[M]}x${N} += vi${ABC[M]} * vw${N};
53        } while (--nnz != 0);
54      }
55      $for N in range(NR):
56        $for M in range(MR):
57          float vout${ABC[M]}x${N} = math_min_f32(vacc${ABC[M]}x${N}, vmax);
58      $for N in range(NR):
59        $for M in range(MR):
60          vout${ABC[M]}x${N} = math_max_f32(vout${ABC[M]}x${N}, vmin);
61      $for M in range(MR):
62        output[${M}] = vout${ABC[M]}x${N};
63      $for N in range(NR):
64        $for M in range(MR):
65          output[${M}] = vout${ABC[M]}x${N};
66        output = (float*restrict) ((uintptr_t) output + output_stride);
67      n -= ${NR};
68    }
69    if XNN_UNLIKELY(n != 0) {
70      do {
71        uint32_t nnz = *nnzmap++;
72        float vacc0 = *w++;
73        $for M in range(1, MR):
74          float vacc${ABC[M]} = vacc0;
75        if XNN_LIKELY(nnz != 0) {
76          do {
77            const intptr_t diff = *dmap++;
78            $for M in range(MR):
79              const float vi${ABC[M]} = input[${M}];
80            input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
81            const float vw = *w++;
82            $for M in range(MR):
83              vacc${ABC[M]} += vi${ABC[M]} * vw;
84          } while (--nnz != 0);
85        }
86        $for M in range(MR):
87          float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
88        $for M in range(MR):
89          vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
90        $for M in range(MR):
91          output[${M}] = vout${ABC[M]};
92        output = (float*restrict) ((uintptr_t) output + output_stride);
93        n -= 1;
94      } while (n != 0);
95    }
96    output = (float*restrict) ((uintptr_t) output - output_decrement);
97    input += ${MR};
98    mc -= ${MR} * sizeof(float);
99  }
100  if XNN_UNLIKELY(mc != 0) {
101    $for LOG2M in reversed(range((MR - 1).bit_length())):
102      $SUBMR = 1 << LOG2M
103      $if SUBMR * 2 >= MR:
104        output_decrement += ${MR - SUBMR} * sizeof(float);
105      $else:
106        output_decrement += ${SUBMR} * sizeof(float);
107      if (mc & (${SUBMR} * sizeof(float))) {
108        const float*restrict w = weights;
109        const int32_t* dmap = widx_dmap;
110        const uint32_t* nnzmap = nidx_nnzmap;
111        size_t n = nc;
112        while (n >= ${NR}) {
113          uint32_t nnz = *nnzmap++;
114          $for N in range(0, NR, 1):
115            float vacc0x${N} = *w++;
116            $for M in range(1, SUBMR):
117              float vacc${ABC[M]}x${N} = vacc0x${N};
118          if XNN_LIKELY(nnz != 0) {
119            do {
120              const intptr_t diff = *dmap++;
121              $for M in range(SUBMR):
122                const float vi${ABC[M]} = input[${M}];
123              input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
124              $for N in range(0, NR, 1):
125                const float vw${N} = *w++;
126              $for N in range(0, NR, 1):
127                $for M in range(SUBMR):
128                  vacc${ABC[M]}x${N} += vi${ABC[M]} * vw${N};
129            } while (--nnz != 0);
130          }
131          $for N in range(0, NR, 1):
132            $for M in range(SUBMR):
133              float vout${ABC[M]}x${N} = math_min_f32(vacc${ABC[M]}x${N}, vmax);
134          $for N in range(0, NR, 1):
135            $for M in range(SUBMR):
136              vout${ABC[M]}x${N} = math_max_f32(vout${ABC[M]}x${N}, vmin);
137          $for N in range(NR):
138            $for M in range(SUBMR):
139              output[${M}] = vout${ABC[M]}x${N};
140            output = (float*restrict) ((uintptr_t) output + output_stride);
141          n -= ${NR};
142        }
143        if XNN_UNLIKELY(n != 0) {
144          do {
145            uint32_t nnz = *nnzmap++;
146            float vacc0 = *w++;
147            $for M in range(1, SUBMR):
148              float vacc${ABC[M]} = vacc0;
149            if XNN_LIKELY(nnz != 0) {
150              do {
151                const intptr_t diff = *dmap++;
152                $for M in range(SUBMR):
153                  const float vi${ABC[M]} = input[${M}];
154                input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
155                const float vw = *w++;
156                $for M in range(SUBMR):
157                  vacc${ABC[M]} += vi${ABC[M]} * vw;
158              } while (--nnz != 0);
159            }
160            $for M in range(SUBMR):
161              float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
162            $for M in range(SUBMR):
163              vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
164            $for M in range(SUBMR):
165              output[${M}] = vout${ABC[M]};
166            output = (float*restrict) ((uintptr_t) output + output_stride);
167            n -= 1;
168          } while (n != 0);
169        }
170        output = (float*restrict) ((uintptr_t) output - output_decrement);
171        input += ${SUBMR};
172      }
173  }
174}
175