• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include <deque>
28 #include <functional>
29 #include <memory>
30 
31 #include "depthwise.hpp"
32 
33 namespace depthwise
34 {
35 
36 template <
37   unsigned int OutputTileRows, unsigned int OutputTileCols,
38   unsigned int KernelRows, unsigned int KernelCols,
39   unsigned int StrideRows, unsigned int StrideCols,
40   typename TIn, typename TBias, typename TOut
41 >
42 class DilatedDepthwiseConvolution : public IDepthwiseConvolution
43 {
44   public:
45     /** Create a new dilated depthwise convolution engine.
46      */
47     DilatedDepthwiseConvolution(
48       int n_batches, int n_input_rows, int n_input_cols, int n_channels,
49       int dilation_factor,
50       nck::ActivationFunction activation,
51       unsigned int padding_top,
52       unsigned int padding_left,
53       unsigned int padding_bottom,
54       unsigned int padding_right
55     );
56 
57     /** Create a new dilated depthwise convolution engine.
58      */
59     DilatedDepthwiseConvolution(
60       int n_batches, int n_input_rows, int n_input_cols, int n_channels,
61       int dilation_factor, int n_output_rows, int n_output_cols,
62       nck::ActivationFunction activation,
63       unsigned int padding_top,
64       unsigned int padding_left,
65       unsigned int padding_bottom,
66       unsigned int padding_right
67     );
68 
69     // Cannot copy or move a DilatedDepthwiseConvolution.
70     DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete;
71     DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete;
72 
73     /* Set input tensor and stride. */
74     void set_input(const void *inptr) override;
75     void set_input(const void *inptr, int column_stride) override;
76     void set_input(const void *inptr, int row_stride, int column_stride) override;
77     void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
78 
79     /* Set output tensor and stride. */
80     void set_output(void *outptr) override;
81     void set_output(void *outptr, int column_stride) override;
82     void set_output(void *outptr, int row_stride, int column_stride) override;
83     void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
84 
85     static int get_output_size(
86       int dim_size,
87       unsigned int padding_before,
88       unsigned int padding_after,
89       int dilation_factor
90     );
91 
92     int output_size(
93       int dim_size, unsigned int padding_before, unsigned int padding_after
94     ) const override;
95 
96     /* Weights and biases are re-ordered to improve memory access patterns. Use
97      * these methods to determine the size of the re-pack buffer and to set the
98      * address (and implicitly reorder the weights and biases into) the buffer.
99      */
100     size_t get_packed_params_size(void) const override;
101     void set_packed_params_buffer(void *) override;
102 
103     void pack_params(const void *weights, const void *biases=nullptr) const override;
104     void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override;
105     void pack_params(
106       void *buffer,
107       const void* weights,
108       unsigned int weight_row_stride,
109       unsigned int weight_col_stride,
110       const void *biases=nullptr
111     ) const override;
112 
113     /* Working space is used to pad tensors on the fly. Before running any
114      * inference check the amount of space required, allocate and provide a
115      * pointer to the convolution engine.
116      */
117     size_t get_working_space_size(unsigned int nthreads=1) const override;
118     void set_working_space(void *) override;
119 
120     unsigned int get_window(void) const override;
121     void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
122 
123   protected:
124     /** Protected constructor which also accepts a function to construct a new
125      * subconvolution
126      */
127     DilatedDepthwiseConvolution(
128       int n_batches, int n_input_rows, int n_input_cols, int n_channels,
129       int dilation_factor, int n_output_rows, int n_output_cols,
130       nck::ActivationFunction activation,
131       unsigned int padding_top,
132       unsigned int padding_left,
133       unsigned int padding_bottom,
134       unsigned int padding_right,
135       std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn
136     );
137 
138     const int _dilation_factor;
139     const int _n_input_rows, _n_input_cols, _n_channels;
140     const int _padding_top, _padding_left;
141     const int _n_output_rows, _n_output_cols;
142 
143     /* Dilated depthwise convolution is performed through repeated calls to
144      * non-dilated convolutions. If the dilation factor is $n$, then we perform
145      * $(n + 1)^2$ depthwise convolutions.
146      */
147     using BaseDepthwise = DepthwiseConvolution<
148       OutputTileRows, OutputTileCols,
149       KernelRows, KernelCols,
150       StrideRows, StrideCols,
151       TIn, TBias, TOut
152     >;
153     std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs;
154 };
155 
156 }  // namespace depthwise
157