1// Copyright 2019 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert CHANNEL_TILE >= 1 7$assert KERNEL_TILE >= 2 8$assert ACCUMULATORS >= 1 9$assert ACTIVATION in ["LINEAR", "MINMAX"] 10$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 11#include <assert.h> 12 13#include <xnnpack/dwconv.h> 14#include <xnnpack/math.h> 15 16 17$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32" 18$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32" 19$SUFFIX = {"LINEAR": "", "MINMAX": "_minmax"}[ACTIVATION] 20$PARAMS = {"LINEAR": "xnn_f32_default_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION] 21void xnn_f32_dwconv${SUFFIX}_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${"wasm" if WASM else "scalar"}${"" if ACCUMULATORS == 1 else "_acc%d" % ACCUMULATORS}( 22 size_t channels, 23 size_t output_width, 24 const float** input, 25 const float* weights, 26 float* output, 27 size_t input_stride, 28 size_t output_increment, 29 size_t input_offset, 30 const float* zero, 31 const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) 32{ 33 assert(channels != 0); 34 assert(output_width != 0); 35 36 $if ACTIVATION == "MINMAX": 37 const float vmin = params->scalar.min; 38 const float vmax = params->scalar.max; 39 do { 40 $for K in range(KERNEL_TILE): 41 const float* i${K} = input[${K}]; 42 assert(i${K} != NULL); 43 if XNN_UNPREDICTABLE(i${K} != zero) { 44 i${K} = (const float*) ((uintptr_t) i${K} + input_offset); 45 } 46 input = (const float**) ((uintptr_t) input + input_stride); 47 48 size_t c = channels; 49 const float* w = weights; 50 $if CHANNEL_TILE > 1: 51 for (; c >= ${CHANNEL_TILE}; c -= ${CHANNEL_TILE}) { 52 $for C in range(CHANNEL_TILE): 53 float vacc${C}p0 = w[${C}]; 54 55 $for K in range(KERNEL_TILE): 56 57 $for C in range(CHANNEL_TILE): 58 const float vi${K}x${C} = i${K}[${C}]; 59 i${K} += ${CHANNEL_TILE}; 60 61 $for C in range(CHANNEL_TILE): 62 const float vk${K}x${C} = w[${(K + 1) * CHANNEL_TILE + C}]; 63 $if 1 <= K < ACCUMULATORS: 64 float vacc${C}p${K} = vi${K}x${C} * vk${K}x${C}; 65 $else: 66 vacc${C}p${K % ACCUMULATORS} += vi${K}x${C} * vk${K}x${C}; 67 68 w += ${(KERNEL_TILE + 1) * CHANNEL_TILE}; 69 70 $if ACCUMULATORS > 1: 71 // Add up all accumulators to vacc${ABC[0:CHANNEL_TILE]}p0 72 $ACC_SLICE = 1 73 $while ACC_SLICE < ACCUMULATORS: 74 $for A in range(0, ACCUMULATORS, ACC_SLICE * 2): 75 $if A + ACC_SLICE < ACCUMULATORS: 76 $for C in range(CHANNEL_TILE): 77 vacc${C}p${A} = vacc${C}p${A} + vacc${C}p${A + ACC_SLICE}; 78 $ACC_SLICE *= 2 79 80 $if ACTIVATION == "MINMAX": 81 $for C in range(CHANNEL_TILE): 82 float vacc${C} = ${MAX_F32}(vacc${C}p0, vmin); 83 84 $for C in range(CHANNEL_TILE): 85 vacc${C} = ${MIN_F32}(vacc${C}, vmax); 86 87 $for C in range(CHANNEL_TILE): 88 output[${C}] = vacc${C}; 89 $else: 90 $for C in range(CHANNEL_TILE): 91 output[${C}] = vacc${C}p0; 92 output += ${CHANNEL_TILE}; 93 } 94 for (; c >= 1; c -= 1) { 95 float vacc0p0 = *w++; 96 97 $for K in range(KERNEL_TILE): 98 const float vi${K} = *i${K}++; 99 const float vk${K} = w[${(K + 1) * CHANNEL_TILE - 1}]; 100 $if 1 <= K < ACCUMULATORS: 101 float vacc0p${K} = vi${K} * vk${K}; 102 $else: 103 vacc0p${K % ACCUMULATORS} += vi${K} * vk${K}; 104 105 $if ACCUMULATORS > 1: 106 // Add up all accumulators to vacc${ABC[0:CHANNEL_TILE]}p0 107 $ACC_SLICE = 1 108 $while ACC_SLICE < ACCUMULATORS: 109 $for A in range(0, ACCUMULATORS, ACC_SLICE * 2): 110 $if A + ACC_SLICE < ACCUMULATORS: 111 vacc0p${A} = vacc0p${A} + vacc0p${A + ACC_SLICE}; 112 $ACC_SLICE *= 2 113 114 $if ACTIVATION == "MINMAX": 115 float vacc0 = ${MAX_F32}(vacc0p0, vmin); 116 vacc0 = ${MIN_F32}(vacc0, vmax); 117 *output++ = vacc0; 118 $else: 119 *output++ = vacc0p0; 120 } 121 $else: 122 do { 123 float vacc0p0 = w[0]; 124 $for K in range(KERNEL_TILE): 125 126 const float vi${K} = *i${K}++; 127 const float vk${K} = w[${K+1}]; 128 $if 1 <= K < ACCUMULATORS: 129 float vacc0p${K} = vi${K} * vk${K}; 130 $else: 131 vacc0p${K % ACCUMULATORS} += vi${K} * vk${K}; 132 133 w += ${KERNEL_TILE + 1}; 134 135 $ACC_STEP = 1 136 $while ACC_STEP < ACCUMULATORS: 137 $for A in range(0, ACCUMULATORS, ACC_STEP * 2): 138 $if A + ACC_STEP < ACCUMULATORS: 139 vacc0p${A} += vacc0p${A + ACC_STEP}; 140 $ACC_STEP *= 2 141 142 $if ACTIVATION == "MINMAX": 143 float vacc0 = ${MAX_F32}(vacc0p0, vmin); 144 vacc0 = ${MIN_F32}(vacc0, vmax); 145 *output++ = vacc0; 146 $else: 147 *output++ = vacc0p0; 148 } while (--c != 0); 149 150 output = (float*) ((uintptr_t) output + output_increment); 151 } while (--output_width != 0); 152} 153