1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6#include <assert.h> 7 8#include <xnnpack/math.h> 9#include <xnnpack/ppmm.h> 10 11 12void xnn_f32_ppmm_ukernel_${MR}x${NR}__scalar( 13 size_t mr, 14 size_t nc, 15 size_t kc, 16 const float*restrict a, 17 const float*restrict w, 18 float*restrict c, 19 size_t cm_stride, 20 size_t cn_stride, 21 const union xnn_f32_output_params params[restrict static 1]) 22{ 23 assert(mr != 0); 24 assert(mr <= ${MR}); 25 assert(nc != 0); 26 assert(kc != 0); 27 assert(kc % sizeof(float) == 0); 28 29 float* c0 = c; 30 $for M in range(1, MR): 31 float* c${M} = (float*) ((uintptr_t) c${M-1} + cm_stride); 32 $if M % 2 == 0: 33 if XNN_UNPREDICTABLE(mr <= ${M}) { 34 c${M} = c${M-1}; 35 } 36 $elif M + 1 == MR: 37 if XNN_UNPREDICTABLE(mr != ${M+1}) { 38 c${M} = c${M-1}; 39 } 40 $else: 41 if XNN_UNPREDICTABLE(mr < ${M+1}) { 42 c${M} = c${M-1}; 43 } 44 45 do { 46 $for N in range(NR): 47 float vacc0x${N} = w[${N}]; 48 $for M in range(1, MR): 49 $for N in range(NR): 50 float vacc${M}x${N} = vacc0x${N}; 51 w += ${NR}; 52 53 size_t k = kc; 54 do { 55 $for M in range(MR): 56 const float va${M} = a[${M}]; 57 a += ${MR}; 58 59 $for N in range(NR): 60 const float vb${N} = w[${N}]; 61 w += ${NR}; 62 63 $for N in range(NR): 64 $for M in range(MR): 65 vacc${M}x${N} += va${M} * vb${N}; 66 67 k -= sizeof(float); 68 } while (k != 0); 69 70 const float vmax = params->scalar.max; 71 $for N in range(NR): 72 $for M in range(MR): 73 vacc${M}x${N} = math_min_f32(vacc${M}x${N}, vmax); 74 75 const float vmin = params->scalar.min; 76 $for N in range(NR): 77 $for M in range(MR): 78 vacc${M}x${N} = math_max_f32(vacc${M}x${N}, vmin); 79 80 if XNN_LIKELY(nc >= ${NR}) { 81 $for M in reversed(range(MR)): 82 $for N in range(NR): 83 c${M}[${N}] = vacc${M}x${N}; 84 85 a = (const float*) ((uintptr_t) a - kc * ${MR}); 86 87 $for M in reversed(range(MR)): 88 c${M} = (float*) ((uintptr_t) c${M} + cn_stride); 89 90 nc -= ${NR}; 91 } else { 92 $for LOG2N in reversed(range(NR.bit_length())): 93 $if NR != 1 << LOG2N: 94 if (nc & ${1 << LOG2N}) { 95 $if LOG2N != 0: 96 $for M in reversed(range(MR)): 97 $for N in range(1 << LOG2N): 98 c${M}[${N}] = vacc${M}x${N}; 99 100 $for M in reversed(range(MR)): 101 $for N in range(1 << (LOG2N - 1)): 102 vacc${M}x${N} = vacc${M}x${N + (1 << LOG2N)}; 103 104 $for M in reversed(range(MR)): 105 c${M} += ${1 << LOG2N}; 106 $else: 107 $for M in reversed(range(MR)): 108 *c${M} = vacc${M}x0; 109 } 110 111 nc = 0; 112 } 113 } while (nc != 0); 114} 115