• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
25 
26 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
27 #include "arm_compute/runtime/CL/CLScheduler.h"
28 #include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
29 #include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
30 #include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
31 #include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
32 #include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
33 
34 #include "arm_compute/core/CL/ICLTensor.h"
35 #include "arm_compute/core/Error.h"
36 #include "arm_compute/core/TensorInfo.h"
37 #include "arm_compute/core/Types.h"
38 #include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
39 #include "src/core/helpers/AutoConfiguration.h"
40 #include "support/MemorySupport.h"
41 
42 namespace arm_compute
43 {
44 namespace experimental
45 {
CLConcatenation()46 CLConcatenation::CLConcatenation()
47     : _concat_kernels(),
48       _num_inputs(0),
49       _axis(Window::DimX)
50 {
51 }
52 
configure(const CLCompileContext & compile_context,const std::vector<ITensorInfo * > & inputs_vector,ITensorInfo * output,size_t axis)53 void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis)
54 {
55     ARM_COMPUTE_ERROR_ON(output == nullptr);
56     _axis       = axis;
57     _num_inputs = inputs_vector.size();
58 
59     TensorShape                      output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis);
60     std::vector<const ITensorInfo *> const_inputs_vector(inputs_vector.size());
61     std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t)
62     {
63         ARM_COMPUTE_ERROR_ON_NULLPTR(t);
64         return t;
65     });
66 
67     // Output auto inizialitation if not yet initialized
68     auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type());
69     ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis));
70 
71     unsigned int offset = 0;
72     switch(_axis)
73     {
74         case Window::DimX:
75         {
76             switch(_num_inputs)
77             {
78                 case 2:
79                 {
80                     // Configure WidthConcatenate2Tensors kernel
81                     auto kernel = support::cpp14::make_unique<CLWidthConcatenate2TensorsKernel>();
82                     kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output);
83                     _concat_kernels.emplace_back(std::move(kernel));
84                     break;
85                 }
86                 case 4:
87                 {
88                     // Configure WidthConcatenate4Tensors kernel
89                     auto kernel = support::cpp14::make_unique<CLWidthConcatenate4TensorsKernel>();
90                     kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
91                     _concat_kernels.emplace_back(std::move(kernel));
92                     break;
93                 }
94                 default:
95                 {
96                     // Configure generic case WidthConcatenate kernels
97                     for(unsigned int i = 0; i < _num_inputs; ++i)
98                     {
99                         auto kernel = support::cpp14::make_unique<CLWidthConcatenateLayerKernel>();
100                         kernel->configure(compile_context, inputs_vector.at(i), offset, output);
101                         offset += inputs_vector.at(i)->dimension(_axis);
102                         _concat_kernels.emplace_back(std::move(kernel));
103                     }
104                     break;
105                 }
106             }
107             break;
108         }
109         case Window::DimY:
110         {
111             for(unsigned int i = 0; i < _num_inputs; ++i)
112             {
113                 auto kernel = support::cpp14::make_unique<CLHeightConcatenateLayerKernel>();
114                 kernel->configure(compile_context, inputs_vector.at(i), offset, output);
115                 offset += inputs_vector.at(i)->dimension(_axis);
116                 _concat_kernels.emplace_back(std::move(kernel));
117             }
118             break;
119         }
120         case Window::DimZ:
121         {
122             for(unsigned int i = 0; i < _num_inputs; ++i)
123             {
124                 auto kernel = support::cpp14::make_unique<CLDepthConcatenateLayerKernel>();
125                 kernel->configure(compile_context, inputs_vector.at(i), offset, output);
126                 offset += inputs_vector.at(i)->dimension(_axis);
127                 _concat_kernels.emplace_back(std::move(kernel));
128             }
129             break;
130         }
131         case 3:
132         {
133             for(unsigned int i = 0; i < _num_inputs; ++i)
134             {
135                 auto kernel = support::cpp14::make_unique<CLBatchConcatenateLayerKernel>();
136                 kernel->configure(compile_context, inputs_vector.at(i), offset, output);
137                 offset += inputs_vector.at(i)->dimension(_axis);
138                 _concat_kernels.emplace_back(std::move(kernel));
139             }
140             break;
141         }
142         default:
143             ARM_COMPUTE_ERROR("Axis not supported");
144     }
145 }
146 
validate(const std::vector<const ITensorInfo * > & inputs_vector,const ITensorInfo * output,size_t axis)147 Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
148 {
149     ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr);
150     const unsigned int num_inputs = inputs_vector.size();
151 
152     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
153     ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
154 
155     unsigned int offset = 0;
156     switch(axis)
157     {
158         case Window::DimX:
159         {
160             switch(num_inputs)
161             {
162                 case 2:
163                     // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
164                     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]);
165                     ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output));
166                     break;
167                 case 4:
168                     // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
169                     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]);
170                     ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output));
171                     break;
172                 default:
173                     // Validate generic case of WidthConcatenate kernel
174                     for(const auto &input : inputs_vector)
175                     {
176                         ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
177                         ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output));
178                         offset += input->dimension(axis);
179                     }
180                     break;
181             }
182             break;
183         }
184         case Window::DimY:
185         {
186             for(const auto &input : inputs_vector)
187             {
188                 ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output));
189                 offset += input->dimension(axis);
190             }
191             break;
192         }
193         case Window::DimZ:
194         {
195             for(const auto &input : inputs_vector)
196             {
197                 ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output));
198                 offset += input->dimension(axis);
199             }
200             break;
201         }
202         case 3:
203         {
204             for(const auto &input : inputs_vector)
205             {
206                 ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output));
207                 offset += input->dimension(axis);
208             }
209             break;
210         }
211         default:
212             ARM_COMPUTE_ERROR("Axis not supported");
213     }
214 
215     if(output->total_size() != 0)
216     {
217         TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis);
218         ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
219     }
220 
221     return Status{};
222 }
223 
run(ITensorPack & tensors)224 void CLConcatenation::run(ITensorPack &tensors)
225 {
226     if(tensors.empty())
227     {
228         ARM_COMPUTE_ERROR("No inputs provided");
229     }
230 
231     if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
232     {
233         ARM_COMPUTE_ERROR("Configured with different number of inputs");
234     }
235 
236     if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
237     {
238         ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
239         CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
240     }
241     else
242     {
243         int i = 0;
244         for(auto &k : _concat_kernels)
245         {
246             ITensorPack pack;
247             pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
248             pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
249             CLScheduler::get().enqueue_op(*k, pack, true);
250             ++i;
251         }
252     }
253 }
254 } // namespace experimental
255 
256 struct CLConcatenateLayer::Impl
257 {
258     std::vector<const ICLTensor *>                 srcs{};
259     ICLTensor                                     *dst{ nullptr };
260     unsigned int                                   num_inputs{ 0 };
261     unsigned int                                   axis{ 0 };
262     std::unique_ptr<experimental::CLConcatenation> op{ nullptr };
263 };
264 
CLConcatenateLayer()265 CLConcatenateLayer::CLConcatenateLayer()
266     : _impl(support::cpp14::make_unique<Impl>())
267 {
268 }
269 
270 CLConcatenateLayer::CLConcatenateLayer(CLConcatenateLayer &&) = default;
271 
272 CLConcatenateLayer &CLConcatenateLayer::operator=(CLConcatenateLayer &&) = default;
273 
274 CLConcatenateLayer::~CLConcatenateLayer() = default;
275 
configure(std::vector<const ICLTensor * > & inputs_vector,ICLTensor * output,size_t axis)276 void CLConcatenateLayer::configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
277 {
278     configure(CLKernelLibrary::get().get_compile_context(), inputs_vector, output, axis);
279 }
280 
configure(const CLCompileContext & compile_context,std::vector<const ICLTensor * > & inputs_vector,ICLTensor * output,size_t axis)281 void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
282 {
283     ARM_COMPUTE_ERROR_ON(output == nullptr);
284 
285     _impl->srcs       = inputs_vector;
286     _impl->dst        = output;
287     _impl->axis       = axis;
288     _impl->num_inputs = inputs_vector.size();
289     _impl->op         = arm_compute::support::cpp14::make_unique<experimental::CLConcatenation>();
290 
291     std::vector<ITensorInfo *> inputs_vector_info;
292     for(unsigned int i = 0; i < inputs_vector.size(); ++i)
293     {
294         ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i));
295         inputs_vector_info.emplace_back(inputs_vector.at(i)->info());
296     }
297     _impl->op->configure(compile_context, inputs_vector_info, _impl->dst->info(), axis);
298 }
299 
validate(const std::vector<const ITensorInfo * > & inputs_vector,const ITensorInfo * output,size_t axis)300 Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
301 {
302     return experimental::CLConcatenation::validate(inputs_vector, output, axis);
303 }
304 
run()305 void CLConcatenateLayer::run()
306 {
307     ITensorPack pack;
308     for(unsigned i = 0; i < _impl->num_inputs; ++i)
309     {
310         pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i));
311     }
312     pack.add_tensor(TensorType::ACL_DST, _impl->dst);
313 
314     _impl->op->run(pack);
315 }
316 } // namespace arm_compute
317