• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include "src/cpu/kernels/assembly/arm_gemm.hpp"
28 #include <cstddef>
29 
30 namespace arm_conv
31 {
32 struct Shape2D
33 {
34     unsigned int rows, cols;
35 };
36 
37 struct ConvolutionArgs
38 {
39     unsigned int         n_batches;
40     Shape2D              input_shape;
41     unsigned int         n_input_channels;
42     unsigned int         pad_top, pad_left;
43     Shape2D              output_shape;
44     unsigned int         n_output_channels;
45     Shape2D              kernel_shape;
46     arm_gemm::Activation activation;
47 
ConvolutionArgsarm_conv::ConvolutionArgs48     ConvolutionArgs(
49         unsigned int   n_batches,
50         const Shape2D &input_shape,
51         unsigned int   n_input_channels,
52         unsigned int pad_top, unsigned int pad_left,
53         const Shape2D              &output_shape,
54         unsigned int                n_output_channels,
55         const Shape2D               kernel_shape,
56         const arm_gemm::Activation &activation = {})
57         : n_batches(n_batches), input_shape(input_shape), n_input_channels(n_input_channels), pad_top(pad_top), pad_left(pad_left), output_shape(output_shape), n_output_channels(n_output_channels),
58           kernel_shape(kernel_shape), activation(activation)
59     {
60     }
61 };
62 
63 namespace winograd
64 {
65 /* Constrain the selected Winograd implementation.
66  */
67 struct WinogradConfig
68 {
69     unsigned int output_rows = 0, output_cols = 0;
70     std::string  input_transform_filter  = "";
71     std::string  output_transform_filter = "";
72     std::string  weight_transform_filter = "";
73 };
74 
75 /* Struct describing (suggested) memory layout within the Winograd domain.
76  */
77 struct WinogradDomainSpec
78 {
79     size_t weight_matrix_size_bytes, input_matrix_size_bytes, output_matrix_size_bytes;
80 
81     size_t weight_ld_matrix, weight_ld_row;
82     size_t input_ld_batch, input_ld_matrix, input_ld_row;
83     size_t output_ld_batch, output_ld_matrix, output_ld_row;
84 };
85 
86 class ITransformCommon
87 {
88 public:
89     virtual ~ITransformCommon() = default;
90 
91     // Get the name of the transform
92     virtual const std::string &get_name(void) const = 0;
93 };
94 
95 namespace weight_transform
96 {
97 class ITransform : public ITransformCommon
98 {
99 public:
100     ~ITransform() = default;
101 
102     virtual unsigned int get_kernel_rows(void) const = 0;
103     virtual unsigned int get_kernel_cols(void) const = 0;
104 
105     virtual unsigned int get_transformed_tile_rows(void) const = 0;
106     virtual unsigned int get_transformed_tile_cols(void) const = 0;
107 
execute(const ConvolutionArgs & args,const void * inptr,size_t ld_in_row,size_t ld_in_col,size_t ld_input_channel,void * outptr,const WinogradDomainSpec & wds,unsigned int thread_id,unsigned int n_threads) const108     void execute(
109         const ConvolutionArgs &args,
110         const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel,
111         void *outptr, const WinogradDomainSpec &wds,
112         unsigned int thread_id, unsigned int n_threads) const
113     {
114         this->execute(
115             args, inptr, ld_in_row, ld_in_col, ld_input_channel,
116             outptr, wds.weight_ld_matrix, wds.weight_ld_row,
117             thread_id, n_threads);
118     }
119 
120     virtual void execute(
121         const ConvolutionArgs &args,
122         const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel,
123         void *outptr, size_t ld_out_matrix, size_t ld_out_row,
124         unsigned int thread_id, unsigned int n_threads) const = 0;
125 };
126 
127 } // namespace weight_transform
128 
129 namespace input_transform
130 {
131 class ITransform : public ITransformCommon
132 {
133 public:
134     ~ITransform() = default;
135 
136     virtual unsigned int get_input_rows(void) const = 0;
137     virtual unsigned int get_input_cols(void) const = 0;
138 
139     virtual size_t get_working_space_size(
140         const ConvolutionArgs &args,
141         unsigned int           n_threads) const = 0;
142 
execute(const ConvolutionArgs & args,const void * inptr,size_t ld_in_batch,size_t ld_in_row,size_t ld_in_col,void * outptr,const WinogradDomainSpec & wds,void * working_space,unsigned int thread_id,unsigned int n_threads) const143     void execute(
144         const ConvolutionArgs &args,
145         const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col,
146         void *outptr, const WinogradDomainSpec &wds,
147         void *working_space, unsigned int thread_id, unsigned int n_threads) const
148     {
149         this->execute(
150             args, inptr, ld_in_batch, ld_in_row, ld_in_col,
151             outptr, wds.input_ld_batch, wds.input_ld_matrix, wds.input_ld_row,
152             working_space, thread_id, n_threads);
153     }
154 
155     virtual void execute(
156         const ConvolutionArgs &args,
157         const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col,
158         void *outptr, size_t ld_out_batch, size_t ld_out_matrix, size_t ld_out_row,
159         void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0;
160 };
161 
162 } // namespace input_transform
163 
164 namespace output_transform
165 {
166 class ITransform : public ITransformCommon
167 {
168 public:
169     ~ITransform() = default;
170 
171     virtual unsigned int get_input_rows(void) const = 0;
172     virtual unsigned int get_input_cols(void) const = 0;
173 
174     virtual unsigned int get_output_rows(void) const = 0;
175     virtual unsigned int get_output_cols(void) const = 0;
176 
177     virtual unsigned int get_kernel_rows(void) const = 0;
178     virtual unsigned int get_kernel_cols(void) const = 0;
179 
180     virtual size_t get_working_space_size(
181         const ConvolutionArgs &args,
182         unsigned int           n_threads) const = 0;
183 
execute(const ConvolutionArgs & args,const void * inptr,const WinogradDomainSpec & wds,const void * bias,void * outptr,size_t ld_out_batch,size_t ld_out_row,size_t ld_out_col,void * working_space,unsigned int thread_id,unsigned int n_threads) const184     void execute(
185         const ConvolutionArgs &args,
186         const void *inptr, const WinogradDomainSpec &wds,
187         const void *bias,
188         void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col,
189         void *working_space, unsigned int thread_id, unsigned int n_threads) const
190     {
191         this->execute(
192             args,
193             inptr, wds.output_ld_batch, wds.output_ld_matrix, wds.output_ld_row,
194             bias,
195             outptr, ld_out_batch, ld_out_row, ld_out_col,
196             working_space, thread_id, n_threads);
197     }
198 
199     virtual void execute(
200         const ConvolutionArgs &args,
201         const void *inptr, size_t ld_in_batch, size_t ld_in_matrix, size_t ld_in_row,
202         const void *bias,
203         void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col,
204         void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0;
205 };
206 
207 } // namespace output_transform
208 
209 struct WinogradImpl
210 {
211     const output_transform::ITransform *output_transform = nullptr;
212     const weight_transform::ITransform *weight_transform = nullptr;
213     const input_transform::ITransform *input_transform  = nullptr;
214     std::unique_ptr<arm_gemm::GemmArgs> gemm_args;
215     WinogradDomainSpec                  winograd_spec;
216 };
217 
218 /* Get pointers to Winograd transforms for the given convolution problem.
219  *
220  * Assigns to the pointers in the `dest` struct and returns true or false to
221  * indicate whether the given problem can be executed or not.
222  */
223 template <typename TIn, typename TWeight = TIn, typename TOut = TIn, typename TWinogradIn = TIn, typename TWinogradOut = TOut>
224 bool get_implementation(
225     WinogradImpl &dest, // Destination for the selected implementation
226     const CPUInfo *,
227     const ConvolutionArgs &,
228     int  max_threads,
229     bool fast_mode,
230     const WinogradConfig *,
231     const arm_gemm::GemmConfig *);
232 
233 } // namespace winograd
234 } // namespace arm_conv
235