• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert CHANNEL_TILE >= 1
7$assert KERNEL_TILE >= 2
8$assert ACCUMULATORS >= 1
9$assert ACTIVATION in ["LINEAR", "MINMAX"]
10$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
11#include <assert.h>
12
13#include <xnnpack/dwconv.h>
14#include <xnnpack/math.h>
15
16
17$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32"
18$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32"
19$SUFFIX = {"LINEAR": "", "MINMAX": "_minmax"}[ACTIVATION]
20$PARAMS = {"LINEAR": "xnn_f32_default_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION]
21void xnn_f32_dwconv${SUFFIX}_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${"wasm" if WASM else "scalar"}${"" if ACCUMULATORS == 1 else "_acc%d" % ACCUMULATORS}(
22    size_t channels,
23    size_t output_width,
24    const float** input,
25    const float* weights,
26    float* output,
27    size_t input_stride,
28    size_t output_increment,
29    size_t input_offset,
30    const float* zero,
31    const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)])
32{
33  assert(channels != 0);
34  assert(output_width != 0);
35
36  $if ACTIVATION == "MINMAX":
37    const float vmin = params->scalar.min;
38    const float vmax = params->scalar.max;
39  do {
40    $for K in range(KERNEL_TILE):
41      const float* i${K} = input[${K}];
42      assert(i${K} != NULL);
43      if XNN_UNPREDICTABLE(i${K} != zero) {
44        i${K} = (const float*) ((uintptr_t) i${K} + input_offset);
45      }
46    input = (const float**) ((uintptr_t) input + input_stride);
47
48    size_t c = channels;
49    const float* w = weights;
50    $if CHANNEL_TILE > 1:
51      for (; c >= ${CHANNEL_TILE}; c -= ${CHANNEL_TILE}) {
52        $for C in range(CHANNEL_TILE):
53          float vacc${C}p0 = w[${C}];
54
55        $for K in range(KERNEL_TILE):
56
57          $for C in range(CHANNEL_TILE):
58            const float vi${K}x${C} = i${K}[${C}];
59          i${K} += ${CHANNEL_TILE};
60
61          $for C in range(CHANNEL_TILE):
62            const float vk${K}x${C} = w[${(K + 1) * CHANNEL_TILE + C}];
63            $if 1 <= K < ACCUMULATORS:
64              float vacc${C}p${K} = vi${K}x${C} * vk${K}x${C};
65            $else:
66              vacc${C}p${K % ACCUMULATORS} += vi${K}x${C} * vk${K}x${C};
67
68        w += ${(KERNEL_TILE + 1) * CHANNEL_TILE};
69
70        $if ACCUMULATORS > 1:
71          // Add up all accumulators to vacc${ABC[0:CHANNEL_TILE]}p0
72          $ACC_SLICE = 1
73          $while ACC_SLICE < ACCUMULATORS:
74            $for A in range(0, ACCUMULATORS, ACC_SLICE * 2):
75              $if A + ACC_SLICE < ACCUMULATORS:
76                $for C in range(CHANNEL_TILE):
77                  vacc${C}p${A} = vacc${C}p${A} + vacc${C}p${A + ACC_SLICE};
78            $ACC_SLICE *= 2
79
80        $if ACTIVATION == "MINMAX":
81          $for C in range(CHANNEL_TILE):
82            float vacc${C} = ${MAX_F32}(vacc${C}p0, vmin);
83
84          $for C in range(CHANNEL_TILE):
85            vacc${C} = ${MIN_F32}(vacc${C}, vmax);
86
87          $for C in range(CHANNEL_TILE):
88            output[${C}] = vacc${C};
89        $else:
90          $for C in range(CHANNEL_TILE):
91            output[${C}] = vacc${C}p0;
92        output += ${CHANNEL_TILE};
93      }
94      for (; c >= 1; c -= 1) {
95        float vacc0p0 = *w++;
96
97        $for K in range(KERNEL_TILE):
98          const float vi${K} = *i${K}++;
99          const float vk${K} = w[${(K + 1) * CHANNEL_TILE - 1}];
100          $if 1 <= K < ACCUMULATORS:
101            float vacc0p${K} = vi${K} * vk${K};
102          $else:
103            vacc0p${K % ACCUMULATORS} += vi${K} * vk${K};
104
105        $if ACCUMULATORS > 1:
106          // Add up all accumulators to vacc${ABC[0:CHANNEL_TILE]}p0
107          $ACC_SLICE = 1
108          $while ACC_SLICE < ACCUMULATORS:
109            $for A in range(0, ACCUMULATORS, ACC_SLICE * 2):
110              $if A + ACC_SLICE < ACCUMULATORS:
111                vacc0p${A} = vacc0p${A} + vacc0p${A + ACC_SLICE};
112            $ACC_SLICE *= 2
113
114        $if ACTIVATION == "MINMAX":
115          float vacc0 = ${MAX_F32}(vacc0p0, vmin);
116          vacc0 = ${MIN_F32}(vacc0, vmax);
117          *output++ = vacc0;
118        $else:
119          *output++ = vacc0p0;
120      }
121    $else:
122      do {
123        float vacc0p0 = w[0];
124        $for K in range(KERNEL_TILE):
125
126          const float vi${K} = *i${K}++;
127          const float vk${K} = w[${K+1}];
128          $if 1 <= K < ACCUMULATORS:
129            float vacc0p${K} = vi${K} * vk${K};
130          $else:
131            vacc0p${K % ACCUMULATORS} += vi${K} * vk${K};
132
133        w += ${KERNEL_TILE + 1};
134
135        $ACC_STEP = 1
136        $while ACC_STEP < ACCUMULATORS:
137          $for A in range(0, ACCUMULATORS, ACC_STEP * 2):
138            $if A + ACC_STEP < ACCUMULATORS:
139              vacc0p${A} += vacc0p${A + ACC_STEP};
140          $ACC_STEP *= 2
141
142        $if ACTIVATION == "MINMAX":
143          float vacc0 = ${MAX_F32}(vacc0p0, vmin);
144          vacc0 = ${MIN_F32}(vacc0, vmax);
145          *output++ = vacc0;
146        $else:
147          *output++ = vacc0p0;
148      } while (--c != 0);
149
150    output = (float*) ((uintptr_t) output + output_increment);
151  } while (--output_width != 0);
152}
153