1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$ABC = "0123456789ABCDEFGHIJK" 7#include <assert.h> 8 9#include <xnnpack/math.h> 10#include <xnnpack/spmm.h> 11 12 13void xnn_f32_spmm_ukernel_${MR}x${NR}__scalar${"_unroll" + str(UNROLL) if UNROLL > 1 else ""}( 14 uint32_t m, 15 uint32_t n, 16 const float*restrict a, 17 const float*restrict weights, 18 const int32_t*restrict widx_dmap, 19 const uint32_t*restrict nidx_nnzmap, 20 float*restrict c, 21 const union xnn_f32_output_params params[restrict static 1]) 22{ 23 assert(m != 0); 24 25 const float vmin = params->scalar.min; 26 const float vmax = params->scalar.max; 27 size_t i = m; 28 while (i >= ${MR}) { 29 const float*restrict w = weights; 30 const int32_t* dmap = widx_dmap; 31 const uint32_t* nnzmap = nidx_nnzmap; 32 size_t j = n; 33 while (j >= ${NR}) { 34 uint32_t nnz = *nnzmap++; 35 $for N in range(0, NR, 1): 36 float vacc0x${N} = *w++; 37 $for M in range(1, MR): 38 float vacc${ABC[M]}x${N} = vacc0x${N}; 39 if XNN_LIKELY(nnz != 0) { 40 do { 41 const intptr_t diff = *dmap++; 42 $for M in range(MR): 43 const float va${ABC[M]} = a[${M}]; 44 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff); 45 $for N in range(0, NR, 1): 46 const float vb${N} = *w++; 47 $for N in range(0, NR, 1): 48 $for M in range(MR): 49 vacc${ABC[M]}x${N} += va${ABC[M]} * vb${N}; 50 } while (--nnz != 0); 51 } 52 $for N in range(NR): 53 $for M in range(MR): 54 float vout${ABC[M]}x${N} = math_min_f32(vacc${ABC[M]}x${N}, vmax); 55 $for N in range(NR): 56 $for M in range(MR): 57 vout${ABC[M]}x${N} = math_max_f32(vout${ABC[M]}x${N}, vmin); 58 $for N in range(NR): 59 $for M in range(MR): 60 c[${N} * m + ${M}] = vout${ABC[M]}x${N}; 61 c += ${NR} * m; 62 j -= ${NR}; 63 } 64 if XNN_UNLIKELY(j != 0) { 65 do { 66 uint32_t nnz = *nnzmap++; 67 float vacc0 = *w++; 68 $for M in range(1, MR): 69 float vacc${ABC[M]} = vacc0; 70 if XNN_LIKELY(nnz != 0) { 71 do { 72 const intptr_t diff = *dmap++; 73 $for M in range(MR): 74 const float va${ABC[M]} = a[${M}]; 75 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff); 76 const float vb = *w++; 77 $for M in range(MR): 78 vacc${ABC[M]} += va${ABC[M]} * vb; 79 } while (--nnz != 0); 80 } 81 $for M in range(MR): 82 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax); 83 $for M in range(MR): 84 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin); 85 $for M in range(MR): 86 c[${M}] = vout${ABC[M]}; 87 c += m; 88 j -= 1; 89 } while (j != 0); 90 } 91 c -= m * n; 92 c += ${MR}; 93 a += ${MR}; 94 i -= ${MR}; 95 } 96 if XNN_UNLIKELY(i != 0) { 97 $for LOG2M in reversed(range((MR - 1).bit_length())): 98 $SUBMR = 1 << LOG2M 99 if (i & ${SUBMR}) { 100 const float*restrict w = weights; 101 const int32_t* dmap = widx_dmap; 102 const uint32_t* nnzmap = nidx_nnzmap; 103 size_t j = n; 104 while (j >= ${NR}) { 105 uint32_t nnz = *nnzmap++; 106 $for N in range(0, NR, 1): 107 float vacc0x${N} = *w++; 108 $for M in range(1, SUBMR): 109 float vacc${ABC[M]}x${N} = vacc0x${N}; 110 if XNN_LIKELY(nnz != 0) { 111 do { 112 const intptr_t diff = *dmap++; 113 $for M in range(SUBMR): 114 const float va${ABC[M]} = a[${M}]; 115 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff); 116 $for N in range(0, NR, 1): 117 const float vb${N} = *w++; 118 $for N in range(0, NR, 1): 119 $for M in range(SUBMR): 120 vacc${ABC[M]}x${N} += va${ABC[M]} * vb${N}; 121 } while (--nnz != 0); 122 } 123 $for N in range(0, NR, 1): 124 $for M in range(SUBMR): 125 float vout${ABC[M]}x${N} = math_min_f32(vacc${ABC[M]}x${N}, vmax); 126 $for N in range(0, NR, 1): 127 $for M in range(SUBMR): 128 vout${ABC[M]}x${N} = math_max_f32(vout${ABC[M]}x${N}, vmin); 129 $for N in range(0, NR, 1): 130 $for M in range(SUBMR): 131 c[${N} * m + ${M}] = vout${ABC[M]}x${N}; 132 c += ${NR} * m; 133 j -= ${NR}; 134 } 135 if XNN_UNLIKELY(j != 0) { 136 do { 137 uint32_t nnz = *nnzmap++; 138 float vacc0 = *w++; 139 $for M in range(1, SUBMR): 140 float vacc${ABC[M]} = vacc0; 141 if XNN_LIKELY(nnz != 0) { 142 do { 143 const intptr_t diff = *dmap++; 144 $for M in range(SUBMR): 145 const float va${ABC[M]} = a[${M}]; 146 a = (const float*restrict) ((uintptr_t) a + (uintptr_t) diff); 147 const float vb = *w++; 148 $for M in range(SUBMR): 149 vacc${ABC[M]} += va${ABC[M]} * vb; 150 } while (--nnz != 0); 151 } 152 $for M in range(SUBMR): 153 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax); 154 $for M in range(SUBMR): 155 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin); 156 $for M in range(SUBMR): 157 c[${M}] = vout${ABC[M]}; 158 c += m; 159 j -= 1; 160 } while (j != 0); 161 } 162 c -= m * n; 163 c += ${SUBMR}; 164 a += ${SUBMR}; 165 } 166 } 167} 168