• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2019 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 #include "arm_gemm_local.hpp"
27 #include "arm_gemm.hpp"
28 #include "winograd.hpp"
29 
30 namespace winograd
31 {
32 
33 
34 class IWinogradConvolutionLayer
35 {
36   public:
37     virtual ~IWinogradConvolutionLayer() = default;
38 
39     virtual unsigned int weight_transform_get_window(void) const = 0;
40     virtual void weight_transform_run(unsigned int start, unsigned int stop) = 0;
41 
42     virtual IInputTransform& input_transform(void) = 0; // Expose the input transform
43     virtual IOutputTransform& output_transform(void) = 0;  // Expose the output transform
44     virtual arm_gemm::IGemmCommon *gemm(void) = 0;  // Expose the underlying GEMM
45 };
46 
47 /** Example of how to construct an ACL-like interface.
48  *
49  * Use `get_weight_storage_size`, `get_input_storage_size` and
50  * `get_output_storage_size` to allocate memory for the convolution engine.
51  * Then create a `WinogradConvolutionLayer`.
52  *
53  * Initialise the weights using `weights_transform.run(...)`.
54  *
55  * For each inference:
56  *   1. Transform the inputs to the Winograd domain using `input_transform.run(...)`
57  *   2. Perform a number of GEMMs using `gemms.run(...)`
58  *   3. Transform the output to the spatial domain using `output_transform.run(...)`
59  */
60 template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols,
61           typename TIn, typename TInGEMM, typename TOutGEMM, typename TOut,
62           WinogradRoots Roots>
63 class WinogradConvolutionLayer : public IWinogradConvolutionLayer
64 {
65   public:
66     using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, Roots>;
67     using WeightsTransform = typename WinogradBase::template WeightsTransform<TIn, TInGEMM>;
68     using InputTransform = typename WinogradBase::template InputTransform<TIn, TInGEMM>;
69     using WinogradConv = typename WinogradBase::template Convolution<TOut, TIn, TInGEMM, TOutGEMM>;
70     using OutputTransform = typename WinogradBase::template OutputTransform<TOutGEMM, TOut>;
71 
72   private:
73     static constexpr int InnerTileRows = OutputTileRows + KernelRows - 1;
74     static constexpr int InnerTileCols = OutputTileCols + KernelCols - 1;
75     static constexpr int N_GEMMS = InnerTileRows * InnerTileCols;
76 
77     const int _n_output_rows, _n_output_cols;
78     const int _kernel_matrix_stride, _kernel_matrix_row_stride;
79     const int _input_matrix_stride, _input_matrix_row_stride;
80     const int _output_matrix_stride, _output_matrix_row_stride;
81     const int _tile_rows, _tile_cols;
82     const int _m, _k, _n;
83 
84     WeightsTransform weights_transform;  /** Operator to transform weights to Winograd domain. */
85     InputTransform _input_transform;      /** Operator to transform input to Winograd domain. */
86     const arm_gemm::GemmArgs gemm_args;
87     arm_gemm::UniqueGemmCommon<TInGEMM, TOutGEMM> gemms;    /** Operator to perform multiple GEMMs. */
88     OutputTransform _output_transform;    /** Operator to transform output from Winograd domain. */
89 
90   public:
91 
92     /** Determine how much memory (in units of TIn) to allocate for the
93      * transformed weights.
94      */
95     static unsigned int get_weight_storage_size(
96       const int n_output_channels,  /** Number of output feature maps. */
97       const int n_input_channels    /** Number of input feature maps. */
98     );
99 
100     static unsigned int get_weight_stride(
101       const int n_output_channels,  /** Number of output feature maps. */
102       const int n_input_channels    /** Number of input feature maps. */
103     );
104 
105     static unsigned int get_weight_multi_stride(
106       const int n_output_channels,  /** Number of output feature maps. */
107       const int n_input_channels    /** Number of input feature maps. */
108     );
109 
110     /** Determine how much memory (in units of TIn) to allocate for the
111      * transformed input.
112      */
113     static unsigned int get_input_storage_size(
114       const int n_batches,     /** Number of batches in the input tensor. */
115       const int n_channels,    /** Number of feature maps in the input tensor. */
116       const int n_rows,        /** Number of rows in each feature map. */
117       const int n_cols,        /** Number of columns in each feature map. */
118       const bool same_padding  /** Use "SAME" padding, otherwise use "VALID". */
119     );
120 
121     /** Get the row stride for the A matrix in the Winograd domain. */
122     static unsigned int get_input_stride(
123       const int n_batches,     /** Number of batches in the input tensor. */
124       const int n_channels,    /** Number of feature maps in the input tensor. */
125       const int n_rows,        /** Number of rows in each feature map. */
126       const int n_cols,        /** Number of columns in each feature map. */
127       const bool same_padding  /** Use "SAME" padding, otherwise use "VALID". */
128     );
129 
130     /** Get the stride between A matrices in the Winograd domain. */
131     static unsigned int get_input_multi_stride(
132       const int n_batches,     /** Number of batches in the input tensor. */
133       const int n_channels,    /** Number of feature maps in the input tensor. */
134       const int n_rows,        /** Number of rows in each feature map. */
135       const int n_cols,        /** Number of columns in each feature map. */
136       const bool same_padding  /** Use "SAME" padding, otherwise use "VALID". */
137     );
138 
139     /** Determine how much memory (in units of TOut) to allocate for the
140      * (Winograd domain) output.
141      */
142     static unsigned int get_output_storage_size(
143       const int n_batches,          /** Number of batches in the output tensor. */
144       const int n_rows,             /** Number of rows in each feature map of the input tensor. */
145       const int n_cols,             /** Number of columns in each feature map of the input tensor. */
146       const int n_output_channels,  /** Number of feature maps in the output tensor. */
147       const bool same_padding       /** Use "SAME" padding, otherwise use "VALID". */
148     );
149 
150     static unsigned int get_output_stride(
151       const int n_batches,          /** Number of batches in the output tensor. */
152       const int n_rows,             /** Number of rows in each feature map of the input tensor. */
153       const int n_cols,             /** Number of columns in each feature map of the input tensor. */
154       const int n_output_channels,  /** Number of feature maps in the output tensor. */
155       const bool same_padding       /** Use "SAME" padding, otherwise use "VALID". */
156     );
157 
158     static unsigned int get_output_multi_stride(
159       const int n_batches,          /** Number of batches in the output tensor. */
160       const int n_rows,             /** Number of rows in each feature map of the input tensor. */
161       const int n_cols,             /** Number of columns in each feature map of the input tensor. */
162       const int n_output_channels,  /** Number of feature maps in the output tensor. */
163       const bool same_padding       /** Use "SAME" padding, otherwise use "VALID". */
164     );
165 
166     /** Get the shape (rows, cols) of a feature map of the output tensor. */
167     static std::pair<int, int> get_output_feature_map_shape(
168       const int n_input_rows,  /** Number of rows in the input feature map. */
169       const int n_input_cols,  /** Number of columns in the input feature map. */
170       const bool same_padding  /** Use "SAME" padding, otherwise use "VALID". */
171     );
172 
173     /** Create a new Winograd convolution layer.
174      */
175     WinogradConvolutionLayer(
176       const arm_gemm::CPUInfo &cpuinfo,       /** Describes CPU properties. */
177       const int n_threads,          /** Maximum number of threads used to execute the convolution. */
178       const int n_batches,          /** Number of batches in the input and output tensors. */
179       const int n_input_channels,   /** Number of feature maps in a batch of the input tensor. */
180       const int n_input_rows,       /** Number of rows in a feature map of the input tensor. */
181       const int n_input_cols,       /** Number of columns in a feature map of the input tensor. */
182       const int n_output_channels,  /** Number of feature maps in the output tensor. */
183       const bool same_padding,      /** Use "SAME" padding, otherwise use "VALID". */
184       const arm_gemm::Activation &activation,
185       const TIn* const weights,     /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */
186       TInGEMM* const weights_storage,  /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */
187       const TIn* const input,       /** Pointer to NHWC ordered input tensor, in the spatial domain. */
188       TInGEMM* const winograd_input,    /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */
189       const TOut* const biases,     /** Pointer to biases vector. Pass nullptr if no bias is provided. */
190       TOut* const output,           /** Pointer to NHWC ordered output tensor, in the spatial domain. */
191       TOutGEMM* const winograd_output,  /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */
192       const bool pretranspose_B=true,         /** Hint that the B matrix can be pretransposed. */
193       arm_gemm::GemmConfig *gemm_cfg=nullptr  /** Pointer to GEMM configuration. */
194     );
195 
196     /* Utility methods for interacting with the layer. */
197     unsigned int weight_transform_get_window(void) const;
198     void weight_transform_run(const unsigned int start, const unsigned int stop);
199 
200     IInputTransform& input_transform(void);
201     IOutputTransform& output_transform(void);
202 
203     /* Get a pointer to the GEMM underlying the Winograd transform. */
204     arm_gemm::IGemmCommon *gemm(void);
205 };
206 
207 }
208