1 /* 2 * Copyright (c) 2019 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #pragma once 26 27 #include <deque> 28 #include <functional> 29 #include <memory> 30 31 #include "depthwise.hpp" 32 33 namespace depthwise 34 { 35 36 template < 37 unsigned int OutputTileRows, unsigned int OutputTileCols, 38 unsigned int KernelRows, unsigned int KernelCols, 39 unsigned int StrideRows, unsigned int StrideCols, 40 typename TIn, typename TBias, typename TOut 41 > 42 class DilatedDepthwiseConvolution : public IDepthwiseConvolution 43 { 44 public: 45 /** Create a new dilated depthwise convolution engine. 46 */ 47 DilatedDepthwiseConvolution( 48 int n_batches, int n_input_rows, int n_input_cols, int n_channels, 49 int dilation_factor, 50 nck::ActivationFunction activation, 51 unsigned int padding_top, 52 unsigned int padding_left, 53 unsigned int padding_bottom, 54 unsigned int padding_right 55 ); 56 57 /** Create a new dilated depthwise convolution engine. 58 */ 59 DilatedDepthwiseConvolution( 60 int n_batches, int n_input_rows, int n_input_cols, int n_channels, 61 int dilation_factor, int n_output_rows, int n_output_cols, 62 nck::ActivationFunction activation, 63 unsigned int padding_top, 64 unsigned int padding_left, 65 unsigned int padding_bottom, 66 unsigned int padding_right 67 ); 68 69 // Cannot copy or move a DilatedDepthwiseConvolution. 70 DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete; 71 DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete; 72 73 /* Set input tensor and stride. */ 74 void set_input(const void *inptr) override; 75 void set_input(const void *inptr, int column_stride) override; 76 void set_input(const void *inptr, int row_stride, int column_stride) override; 77 void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; 78 79 /* Set output tensor and stride. */ 80 void set_output(void *outptr) override; 81 void set_output(void *outptr, int column_stride) override; 82 void set_output(void *outptr, int row_stride, int column_stride) override; 83 void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; 84 85 static int get_output_size( 86 int dim_size, 87 unsigned int padding_before, 88 unsigned int padding_after, 89 int dilation_factor 90 ); 91 92 int output_size( 93 int dim_size, unsigned int padding_before, unsigned int padding_after 94 ) const override; 95 96 /* Weights and biases are re-ordered to improve memory access patterns. Use 97 * these methods to determine the size of the re-pack buffer and to set the 98 * address (and implicitly reorder the weights and biases into) the buffer. 99 */ 100 size_t get_packed_params_size(void) const override; 101 void set_packed_params_buffer(void *) override; 102 103 void pack_params(const void *weights, const void *biases=nullptr) const override; 104 void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override; 105 void pack_params( 106 void *buffer, 107 const void* weights, 108 unsigned int weight_row_stride, 109 unsigned int weight_col_stride, 110 const void *biases=nullptr 111 ) const override; 112 113 /* Working space is used to pad tensors on the fly. Before running any 114 * inference check the amount of space required, allocate and provide a 115 * pointer to the convolution engine. 116 */ 117 size_t get_working_space_size(unsigned int nthreads=1) const override; 118 void set_working_space(void *) override; 119 120 unsigned int get_window(void) const override; 121 void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; 122 123 protected: 124 /** Protected constructor which also accepts a function to construct a new 125 * subconvolution 126 */ 127 DilatedDepthwiseConvolution( 128 int n_batches, int n_input_rows, int n_input_cols, int n_channels, 129 int dilation_factor, int n_output_rows, int n_output_cols, 130 nck::ActivationFunction activation, 131 unsigned int padding_top, 132 unsigned int padding_left, 133 unsigned int padding_bottom, 134 unsigned int padding_right, 135 std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn 136 ); 137 138 const int _dilation_factor; 139 const int _n_input_rows, _n_input_cols, _n_channels; 140 const int _padding_top, _padding_left; 141 const int _n_output_rows, _n_output_cols; 142 143 /* Dilated depthwise convolution is performed through repeated calls to 144 * non-dilated convolutions. If the dilation factor is $n$, then we perform 145 * $(n + 1)^2$ depthwise convolutions. 146 */ 147 using BaseDepthwise = DepthwiseConvolution< 148 OutputTileRows, OutputTileCols, 149 KernelRows, KernelCols, 150 StrideRows, StrideCols, 151 TIn, TBias, TOut 152 >; 153 std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs; 154 }; 155 156 } // namespace depthwise 157