1 /* 2 * Copyright (c) 2017-2019 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #pragma once 26 #include "arm_gemm_local.hpp" 27 #include "arm_gemm.hpp" 28 #include "winograd.hpp" 29 30 namespace winograd 31 { 32 33 34 class IWinogradConvolutionLayer 35 { 36 public: 37 virtual ~IWinogradConvolutionLayer() = default; 38 39 virtual unsigned int weight_transform_get_window(void) const = 0; 40 virtual void weight_transform_run(unsigned int start, unsigned int stop) = 0; 41 42 virtual IInputTransform& input_transform(void) = 0; // Expose the input transform 43 virtual IOutputTransform& output_transform(void) = 0; // Expose the output transform 44 virtual arm_gemm::IGemmCommon *gemm(void) = 0; // Expose the underlying GEMM 45 }; 46 47 /** Example of how to construct an ACL-like interface. 48 * 49 * Use `get_weight_storage_size`, `get_input_storage_size` and 50 * `get_output_storage_size` to allocate memory for the convolution engine. 51 * Then create a `WinogradConvolutionLayer`. 52 * 53 * Initialise the weights using `weights_transform.run(...)`. 54 * 55 * For each inference: 56 * 1. Transform the inputs to the Winograd domain using `input_transform.run(...)` 57 * 2. Perform a number of GEMMs using `gemms.run(...)` 58 * 3. Transform the output to the spatial domain using `output_transform.run(...)` 59 */ 60 template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols, 61 typename TIn, typename TInGEMM, typename TOutGEMM, typename TOut, 62 WinogradRoots Roots> 63 class WinogradConvolutionLayer : public IWinogradConvolutionLayer 64 { 65 public: 66 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, Roots>; 67 using WeightsTransform = typename WinogradBase::template WeightsTransform<TIn, TInGEMM>; 68 using InputTransform = typename WinogradBase::template InputTransform<TIn, TInGEMM>; 69 using WinogradConv = typename WinogradBase::template Convolution<TOut, TIn, TInGEMM, TOutGEMM>; 70 using OutputTransform = typename WinogradBase::template OutputTransform<TOutGEMM, TOut>; 71 72 private: 73 static constexpr int InnerTileRows = OutputTileRows + KernelRows - 1; 74 static constexpr int InnerTileCols = OutputTileCols + KernelCols - 1; 75 static constexpr int N_GEMMS = InnerTileRows * InnerTileCols; 76 77 const int _n_output_rows, _n_output_cols; 78 const int _kernel_matrix_stride, _kernel_matrix_row_stride; 79 const int _input_matrix_stride, _input_matrix_row_stride; 80 const int _output_matrix_stride, _output_matrix_row_stride; 81 const int _tile_rows, _tile_cols; 82 const int _m, _k, _n; 83 84 WeightsTransform weights_transform; /** Operator to transform weights to Winograd domain. */ 85 InputTransform _input_transform; /** Operator to transform input to Winograd domain. */ 86 const arm_gemm::GemmArgs gemm_args; 87 arm_gemm::UniqueGemmCommon<TInGEMM, TOutGEMM> gemms; /** Operator to perform multiple GEMMs. */ 88 OutputTransform _output_transform; /** Operator to transform output from Winograd domain. */ 89 90 public: 91 92 /** Determine how much memory (in units of TIn) to allocate for the 93 * transformed weights. 94 */ 95 static unsigned int get_weight_storage_size( 96 const int n_output_channels, /** Number of output feature maps. */ 97 const int n_input_channels /** Number of input feature maps. */ 98 ); 99 100 static unsigned int get_weight_stride( 101 const int n_output_channels, /** Number of output feature maps. */ 102 const int n_input_channels /** Number of input feature maps. */ 103 ); 104 105 static unsigned int get_weight_multi_stride( 106 const int n_output_channels, /** Number of output feature maps. */ 107 const int n_input_channels /** Number of input feature maps. */ 108 ); 109 110 /** Determine how much memory (in units of TIn) to allocate for the 111 * transformed input. 112 */ 113 static unsigned int get_input_storage_size( 114 const int n_batches, /** Number of batches in the input tensor. */ 115 const int n_channels, /** Number of feature maps in the input tensor. */ 116 const int n_rows, /** Number of rows in each feature map. */ 117 const int n_cols, /** Number of columns in each feature map. */ 118 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 119 ); 120 121 /** Get the row stride for the A matrix in the Winograd domain. */ 122 static unsigned int get_input_stride( 123 const int n_batches, /** Number of batches in the input tensor. */ 124 const int n_channels, /** Number of feature maps in the input tensor. */ 125 const int n_rows, /** Number of rows in each feature map. */ 126 const int n_cols, /** Number of columns in each feature map. */ 127 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 128 ); 129 130 /** Get the stride between A matrices in the Winograd domain. */ 131 static unsigned int get_input_multi_stride( 132 const int n_batches, /** Number of batches in the input tensor. */ 133 const int n_channels, /** Number of feature maps in the input tensor. */ 134 const int n_rows, /** Number of rows in each feature map. */ 135 const int n_cols, /** Number of columns in each feature map. */ 136 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 137 ); 138 139 /** Determine how much memory (in units of TOut) to allocate for the 140 * (Winograd domain) output. 141 */ 142 static unsigned int get_output_storage_size( 143 const int n_batches, /** Number of batches in the output tensor. */ 144 const int n_rows, /** Number of rows in each feature map of the input tensor. */ 145 const int n_cols, /** Number of columns in each feature map of the input tensor. */ 146 const int n_output_channels, /** Number of feature maps in the output tensor. */ 147 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 148 ); 149 150 static unsigned int get_output_stride( 151 const int n_batches, /** Number of batches in the output tensor. */ 152 const int n_rows, /** Number of rows in each feature map of the input tensor. */ 153 const int n_cols, /** Number of columns in each feature map of the input tensor. */ 154 const int n_output_channels, /** Number of feature maps in the output tensor. */ 155 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 156 ); 157 158 static unsigned int get_output_multi_stride( 159 const int n_batches, /** Number of batches in the output tensor. */ 160 const int n_rows, /** Number of rows in each feature map of the input tensor. */ 161 const int n_cols, /** Number of columns in each feature map of the input tensor. */ 162 const int n_output_channels, /** Number of feature maps in the output tensor. */ 163 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 164 ); 165 166 /** Get the shape (rows, cols) of a feature map of the output tensor. */ 167 static std::pair<int, int> get_output_feature_map_shape( 168 const int n_input_rows, /** Number of rows in the input feature map. */ 169 const int n_input_cols, /** Number of columns in the input feature map. */ 170 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ 171 ); 172 173 /** Create a new Winograd convolution layer. 174 */ 175 WinogradConvolutionLayer( 176 const arm_gemm::CPUInfo &cpuinfo, /** Describes CPU properties. */ 177 const int n_threads, /** Maximum number of threads used to execute the convolution. */ 178 const int n_batches, /** Number of batches in the input and output tensors. */ 179 const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */ 180 const int n_input_rows, /** Number of rows in a feature map of the input tensor. */ 181 const int n_input_cols, /** Number of columns in a feature map of the input tensor. */ 182 const int n_output_channels, /** Number of feature maps in the output tensor. */ 183 const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */ 184 const arm_gemm::Activation &activation, 185 const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */ 186 TInGEMM* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */ 187 const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */ 188 TInGEMM* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */ 189 const TOut* const biases, /** Pointer to biases vector. Pass nullptr if no bias is provided. */ 190 TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */ 191 TOutGEMM* const winograd_output, /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */ 192 const bool pretranspose_B=true, /** Hint that the B matrix can be pretransposed. */ 193 arm_gemm::GemmConfig *gemm_cfg=nullptr /** Pointer to GEMM configuration. */ 194 ); 195 196 /* Utility methods for interacting with the layer. */ 197 unsigned int weight_transform_get_window(void) const; 198 void weight_transform_run(const unsigned int start, const unsigned int stop); 199 200 IInputTransform& input_transform(void); 201 IOutputTransform& output_transform(void); 202 203 /* Get a pointer to the GEMM underlying the Winograd transform. */ 204 arm_gemm::IGemmCommon *gemm(void); 205 }; 206 207 } 208