1 /* 2 * Copyright (c) 2017-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H 25 #define ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H 26 27 #include "arm_compute/runtime/IFunction.h" 28 29 #include "arm_compute/runtime/MemoryGroup.h" 30 #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" 31 #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" 32 #include "arm_compute/runtime/NEON/functions/NEGEMM.h" 33 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" 34 #include "arm_compute/runtime/Tensor.h" 35 36 namespace arm_compute 37 { 38 class NEFlattenLayerKernel; 39 40 /** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: 41 * 42 * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. 43 */ 44 class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder 45 { 46 public: 47 /** Constructor */ 48 NEFullyConnectedLayerReshapeWeights() = default; 49 /** Prevent instances of this class from being copied (As this class contains pointers) */ 50 NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete; 51 /** Prevent instances of this class from being copied (As this class contains pointers) */ 52 NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete; 53 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 54 NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete; 55 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 56 NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete; 57 /** Default destructor */ 58 ~NEFullyConnectedLayerReshapeWeights() = default; 59 /** Set the input and output tensors. 60 * 61 * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. 62 * @param[out] output Destination tensor. Data type supported: Same as @p input. 63 */ 64 void configure(const ITensor *input, ITensor *output); 65 /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights 66 * 67 * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. 68 * @param[in] output Destination tensor info. Data type supported: Same as @p input. 69 * 70 * @return a status 71 */ 72 static Status validate(const ITensorInfo *input, const ITensorInfo *output); 73 }; 74 75 namespace weights_transformations 76 { 77 /** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */ 78 class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights 79 { 80 public: run()81 void run() override 82 { 83 _output.allocator()->allocate(); 84 _func.run(); 85 _reshape_run = true; 86 } 87 release()88 void release() override 89 { 90 _output.allocator()->free(); 91 } 92 get_weights()93 ITensor *get_weights() override 94 { 95 return &_output; 96 } 97 uid()98 uint32_t uid() override 99 { 100 return _uid; 101 } 102 configure(const ITensor * input)103 void configure(const ITensor *input) 104 { 105 _func.configure(input, &_output); 106 } 107 108 private: 109 static constexpr uint32_t _uid = 0x0; 110 Tensor _output{}; 111 NEFullyConnectedLayerReshapeWeights _func{}; 112 }; 113 } // namespace weights_transformations 114 115 /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: 116 * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) 117 * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) 118 * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) 119 * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) 120 * 121 * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. 122 */ 123 class NEFullyConnectedLayer : public IFunction 124 { 125 public: 126 /** Constructor */ 127 NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); 128 /** Prevent instances of this class from being copied (As this class contains pointers) */ 129 NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete; 130 /** Prevent instances of this class from being moved (As this class contains pointers) */ 131 NEFullyConnectedLayer(NEFullyConnectedLayer &&) = delete; 132 /** Prevent instances of this class from being copied (As this class contains pointers) */ 133 NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete; 134 /** Prevent instances of this class from being moved (As this class contains pointers) */ 135 NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = delete; 136 /** Default destructor */ 137 ~NEFullyConnectedLayer(); 138 /** Set the input and output tensors. 139 * 140 * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. 141 * @param[in] weights Weights tensor. The weights must be 2 dimensional. 142 * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. 143 * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. 144 * Data type supported: Same as @p input. 145 * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. 146 * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: 147 * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer 148 * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. 149 * Data type supported: Same as @p input. 150 * @param[in] fc_info (Optional) Fully connected layer additional info 151 */ 152 void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, 153 FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); 154 /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer 155 * 156 * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. 157 * @param[in] weights Weights tensor info. The weights must be 2 dimensional. 158 * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. 159 * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. 160 * Data type supported: Same as @p input. 161 * @param[in] biases Bias tensor. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED. 162 * @param[in] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: 163 * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer 164 * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. 165 * Data type supported: Same as @p input. 166 * @param[in] fc_info (Optional) Fully connected layer additional info 167 * 168 * @return a status 169 */ 170 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, 171 FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); 172 173 //Inherited methods override 174 void run() override; 175 void prepare() override; 176 177 private: 178 void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); 179 void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); 180 void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const ActivationLayerInfo &act); 181 182 MemoryGroup _memory_group; 183 IWeightsManager *_weights_manager; 184 std::unique_ptr<NEFlattenLayerKernel> _flatten_kernel; 185 NEConvertFullyConnectedWeights _convert_weights; 186 weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed; 187 NEFullyConnectedLayerReshapeWeights _reshape_weights_function; 188 weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function; 189 NEGEMM _mm_gemm; 190 NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; 191 Tensor _flatten_output; 192 Tensor _converted_weights_output; 193 Tensor _reshape_weights_output; 194 const ITensor *_original_weights; 195 bool _are_weights_converted; 196 bool _are_weights_reshaped; 197 bool _is_fc_after_conv; 198 bool _is_quantized_asymmetric; 199 bool _is_prepared; 200 }; 201 } // namespace arm_compute 202 #endif /* ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H */ 203