1 /* 2 * Copyright (c) 2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #pragma once 26 27 #include "src/cpu/kernels/assembly/arm_gemm.hpp" 28 #include <cstddef> 29 30 namespace arm_conv 31 { 32 struct Shape2D 33 { 34 unsigned int rows, cols; 35 }; 36 37 struct ConvolutionArgs 38 { 39 unsigned int n_batches; 40 Shape2D input_shape; 41 unsigned int n_input_channels; 42 unsigned int pad_top, pad_left; 43 Shape2D output_shape; 44 unsigned int n_output_channels; 45 Shape2D kernel_shape; 46 arm_gemm::Activation activation; 47 ConvolutionArgsarm_conv::ConvolutionArgs48 ConvolutionArgs( 49 unsigned int n_batches, 50 const Shape2D &input_shape, 51 unsigned int n_input_channels, 52 unsigned int pad_top, unsigned int pad_left, 53 const Shape2D &output_shape, 54 unsigned int n_output_channels, 55 const Shape2D kernel_shape, 56 const arm_gemm::Activation &activation = {}) 57 : n_batches(n_batches), input_shape(input_shape), n_input_channels(n_input_channels), pad_top(pad_top), pad_left(pad_left), output_shape(output_shape), n_output_channels(n_output_channels), 58 kernel_shape(kernel_shape), activation(activation) 59 { 60 } 61 }; 62 63 namespace winograd 64 { 65 /* Constrain the selected Winograd implementation. 66 */ 67 struct WinogradConfig 68 { 69 unsigned int output_rows = 0, output_cols = 0; 70 std::string input_transform_filter = ""; 71 std::string output_transform_filter = ""; 72 std::string weight_transform_filter = ""; 73 }; 74 75 /* Struct describing (suggested) memory layout within the Winograd domain. 76 */ 77 struct WinogradDomainSpec 78 { 79 size_t weight_matrix_size_bytes, input_matrix_size_bytes, output_matrix_size_bytes; 80 81 size_t weight_ld_matrix, weight_ld_row; 82 size_t input_ld_batch, input_ld_matrix, input_ld_row; 83 size_t output_ld_batch, output_ld_matrix, output_ld_row; 84 }; 85 86 class ITransformCommon 87 { 88 public: 89 virtual ~ITransformCommon() = default; 90 91 // Get the name of the transform 92 virtual const std::string &get_name(void) const = 0; 93 }; 94 95 namespace weight_transform 96 { 97 class ITransform : public ITransformCommon 98 { 99 public: 100 ~ITransform() = default; 101 102 virtual unsigned int get_kernel_rows(void) const = 0; 103 virtual unsigned int get_kernel_cols(void) const = 0; 104 105 virtual unsigned int get_transformed_tile_rows(void) const = 0; 106 virtual unsigned int get_transformed_tile_cols(void) const = 0; 107 execute(const ConvolutionArgs & args,const void * inptr,size_t ld_in_row,size_t ld_in_col,size_t ld_input_channel,void * outptr,const WinogradDomainSpec & wds,unsigned int thread_id,unsigned int n_threads) const108 void execute( 109 const ConvolutionArgs &args, 110 const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel, 111 void *outptr, const WinogradDomainSpec &wds, 112 unsigned int thread_id, unsigned int n_threads) const 113 { 114 this->execute( 115 args, inptr, ld_in_row, ld_in_col, ld_input_channel, 116 outptr, wds.weight_ld_matrix, wds.weight_ld_row, 117 thread_id, n_threads); 118 } 119 120 virtual void execute( 121 const ConvolutionArgs &args, 122 const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel, 123 void *outptr, size_t ld_out_matrix, size_t ld_out_row, 124 unsigned int thread_id, unsigned int n_threads) const = 0; 125 }; 126 127 } // namespace weight_transform 128 129 namespace input_transform 130 { 131 class ITransform : public ITransformCommon 132 { 133 public: 134 ~ITransform() = default; 135 136 virtual unsigned int get_input_rows(void) const = 0; 137 virtual unsigned int get_input_cols(void) const = 0; 138 139 virtual size_t get_working_space_size( 140 const ConvolutionArgs &args, 141 unsigned int n_threads) const = 0; 142 execute(const ConvolutionArgs & args,const void * inptr,size_t ld_in_batch,size_t ld_in_row,size_t ld_in_col,void * outptr,const WinogradDomainSpec & wds,void * working_space,unsigned int thread_id,unsigned int n_threads) const143 void execute( 144 const ConvolutionArgs &args, 145 const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col, 146 void *outptr, const WinogradDomainSpec &wds, 147 void *working_space, unsigned int thread_id, unsigned int n_threads) const 148 { 149 this->execute( 150 args, inptr, ld_in_batch, ld_in_row, ld_in_col, 151 outptr, wds.input_ld_batch, wds.input_ld_matrix, wds.input_ld_row, 152 working_space, thread_id, n_threads); 153 } 154 155 virtual void execute( 156 const ConvolutionArgs &args, 157 const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col, 158 void *outptr, size_t ld_out_batch, size_t ld_out_matrix, size_t ld_out_row, 159 void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0; 160 }; 161 162 } // namespace input_transform 163 164 namespace output_transform 165 { 166 class ITransform : public ITransformCommon 167 { 168 public: 169 ~ITransform() = default; 170 171 virtual unsigned int get_input_rows(void) const = 0; 172 virtual unsigned int get_input_cols(void) const = 0; 173 174 virtual unsigned int get_output_rows(void) const = 0; 175 virtual unsigned int get_output_cols(void) const = 0; 176 177 virtual unsigned int get_kernel_rows(void) const = 0; 178 virtual unsigned int get_kernel_cols(void) const = 0; 179 180 virtual size_t get_working_space_size( 181 const ConvolutionArgs &args, 182 unsigned int n_threads) const = 0; 183 execute(const ConvolutionArgs & args,const void * inptr,const WinogradDomainSpec & wds,const void * bias,void * outptr,size_t ld_out_batch,size_t ld_out_row,size_t ld_out_col,void * working_space,unsigned int thread_id,unsigned int n_threads) const184 void execute( 185 const ConvolutionArgs &args, 186 const void *inptr, const WinogradDomainSpec &wds, 187 const void *bias, 188 void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col, 189 void *working_space, unsigned int thread_id, unsigned int n_threads) const 190 { 191 this->execute( 192 args, 193 inptr, wds.output_ld_batch, wds.output_ld_matrix, wds.output_ld_row, 194 bias, 195 outptr, ld_out_batch, ld_out_row, ld_out_col, 196 working_space, thread_id, n_threads); 197 } 198 199 virtual void execute( 200 const ConvolutionArgs &args, 201 const void *inptr, size_t ld_in_batch, size_t ld_in_matrix, size_t ld_in_row, 202 const void *bias, 203 void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col, 204 void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0; 205 }; 206 207 } // namespace output_transform 208 209 struct WinogradImpl 210 { 211 const output_transform::ITransform *output_transform = nullptr; 212 const weight_transform::ITransform *weight_transform = nullptr; 213 const input_transform::ITransform *input_transform = nullptr; 214 std::unique_ptr<arm_gemm::GemmArgs> gemm_args; 215 WinogradDomainSpec winograd_spec; 216 }; 217 218 /* Get pointers to Winograd transforms for the given convolution problem. 219 * 220 * Assigns to the pointers in the `dest` struct and returns true or false to 221 * indicate whether the given problem can be executed or not. 222 */ 223 template <typename TIn, typename TWeight = TIn, typename TOut = TIn, typename TWinogradIn = TIn, typename TWinogradOut = TOut> 224 bool get_implementation( 225 WinogradImpl &dest, // Destination for the selected implementation 226 const CPUInfo *, 227 const ConvolutionArgs &, 228 int max_threads, 229 bool fast_mode, 230 const WinogradConfig *, 231 const arm_gemm::GemmConfig *); 232 233 } // namespace winograd 234 } // namespace arm_conv 235