• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
25 #define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
26 
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/ITensorInfo.h"
29 #include "arm_compute/core/KernelDescriptors.h"
30 #include "arm_compute/core/Utils.h"
31 
32 #include "arm_compute/core/utils/helpers/tensor_transform.h"
33 
34 #include <cmath>
35 
36 namespace arm_compute
37 {
38 namespace misc
39 {
40 namespace shape_calculator
41 {
42 /** Calculate the output tensor shape for the reduce mean operation
43  *
44  * @param[in] input          Input tensor shape
45  * @param[in] reduction_axis Reduction axis
46  * @param[in] keep_dims      Flag to indicate if dimensions are kept
47  *
48  * @return the calculated shape
49  */
calculate_reduce_mean_shape(ITensorInfo * input,const Coordinates & reduction_axis,bool keep_dims)50 inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims)
51 {
52     const int   reduction_ops = reduction_axis.num_dimensions();
53     Coordinates axis_local    = reduction_axis;
54     const int   input_dims    = input->num_dimensions();
55     convert_negative_axis(axis_local, input_dims);
56     TensorShape out_shape = input->tensor_shape();
57     // Configure reshape layer if we want to drop the dimensions
58     if(!keep_dims)
59     {
60         // We have to sort the reduction axis vectors in order for remove_dimension
61         // to work properly
62         std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
63         for(int i = 0; i < reduction_ops; ++i)
64         {
65             out_shape.remove_dimension(axis_local[i] - i);
66         }
67         return out_shape;
68     }
69     else
70     {
71         for(int i = 0; i < reduction_ops; ++i)
72         {
73             out_shape.set(axis_local[i], 1);
74         }
75         return out_shape;
76     }
77 }
78 /** Calculate the output tensor shape of a vector input given the convolution dimensions
79  *
80  * @param[in] input       Input tensor shape
81  * @param[in] conv_w      Convolution width
82  * @param[in] conv_h      Convolution height
83  * @param[in] data_layout Data layout
84  *
85  * @return the calculated shape
86  */
compute_vector_to_tensor_output_shape(const TensorShape & input,size_t conv_w,size_t conv_h,const DataLayout & data_layout)87 inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
88 {
89     const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
90     const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
91     const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
92 
93     TensorShape output_shape(input);
94     output_shape.set(idx_w, conv_w);
95     output_shape.set(idx_h, conv_h);
96     output_shape.set(idx_c, input.x() / (conv_w * conv_h));
97 
98     return output_shape;
99 }
100 
101 /** Calculate the permuted shape of an input given a permutation vector
102  *
103  * @param[in] input Input tensor info
104  * @param[in] perm  Permutation vector
105  *
106  * @return the calculated shape
107  */
compute_permutation_output_shape(const ITensorInfo & input,const PermutationVector & perm)108 inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm)
109 {
110     TensorShape output_shape = input.tensor_shape();
111     permute(output_shape, perm);
112     return output_shape;
113 }
114 
115 /** Calculate the output shape of the reorg layer given a stride
116  *
117  * @param[in] input  Input tensor info
118  * @param[in] stride Stride
119  *
120  * @return the calculated shape
121  */
compute_reorg_output_shape(const ITensorInfo & input,int32_t stride)122 inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t stride)
123 {
124     const size_t idx_width   = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
125     const size_t idx_height  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
126     const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
127 
128     ARM_COMPUTE_ERROR_ON(stride <= 0);
129     ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride");
130     ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride");
131 
132     TensorShape output_shape{ input.tensor_shape() };
133 
134     output_shape.set(idx_width, output_shape[idx_width] / stride);
135     output_shape.set(idx_height, output_shape[idx_height] / stride);
136     output_shape.set(idx_channel, output_shape[idx_channel] * stride * stride);
137 
138     return output_shape;
139 }
140 
141 /** Calculate the reshaped shape of the weights
142  *
143  * @param[in] weights    Weights tensor info
144  * @param[in] has_bias   (Optional) Set to true if there is bias
145  * @param[in] num_groups (Optional) Number of groups
146  *
147  * @return the calculated shape of the reshaped weights
148  */
149 inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
150 {
151     // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
152     ARM_COMPUTE_ERROR_ON(num_groups == 0);
153     ARM_COMPUTE_ERROR_ON(weights.data_layout() == DataLayout::NHWC && num_groups > 1);
154     ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
155 
156     // Calculate output shape
157     TensorShape weights_reshaped{ weights.tensor_shape() };
158     weights_reshaped.set(3, weights_reshaped[3] / num_groups);
159 
160     weights_reshaped.collapse(3);
161     const size_t tmp_dim = weights_reshaped[0];
162     weights_reshaped.set(0, weights_reshaped[1]);
163     weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
164     if(weights.num_dimensions() < 5)
165     {
166         weights_reshaped.set(2, num_groups);
167     }
168 
169     return weights_reshaped;
170 }
171 
172 /** Calculate the Left Hand Side matrix reshaped shape
173  *
174  * @param[in] a                       Input tensor info
175  * @param[in] lhs_info                Left Hand Side matrix information
176  * @param[in] reinterpret_input_as_3d (Optional) Set to true if the input need to be interpreted as 3d
177  *
178  * @return the calculated shape
179  */
180 inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false)
181 {
182     ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0);
183     ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0);
184     ARM_COMPUTE_ERROR_ON(lhs_info.v0 == 0);
185 
186     // Input width/height
187     const unsigned int input_width  = a.dimension(0);
188     const unsigned int input_height = reinterpret_input_as_3d ? a.dimension(1) * a.dimension(2) : a.dimension(1);
189 
190     // Number of horizontal/vertical blocks in the input tensor
191     const unsigned int num_horiz_blocks = std::ceil(input_width / static_cast<float>(lhs_info.k0));
192     const unsigned int num_vert_blocks  = std::ceil(input_height / static_cast<float>(lhs_info.m0));
193 
194     // Block size
195     const unsigned int block_size = lhs_info.m0 * lhs_info.k0;
196 
197     // Output width/height
198     const unsigned int output_width  = block_size * num_horiz_blocks * lhs_info.v0;
199     const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0));
200 
201     TensorShape lhs_shape{ a.tensor_shape() };
202     lhs_shape.set(0, output_width);
203     lhs_shape.set(1, output_height);
204 
205     if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
206     {
207         // When the data format is NHWC and the shapes are Nx1x1
208         // the tensor shape num_dimensions is automatically set to 1 instead of 3.
209         // To avoid failures by removing a dimension that doesn't exist
210         // check if the number of dimensions is greater than 2.
211         lhs_shape.remove_dimension(2);
212     }
213 
214     return lhs_shape;
215 }
216 
217 /** Calculate the Right Hand Side matrix reshaped shape
218  *
219  * @param[in] a        Input tensor info
220  * @param[in] rhs_info Right Hand Side matrix information
221  *
222  * @return the calculated shape
223  */
compute_rhs_reshaped_shape(const ITensorInfo & a,const GEMMRHSMatrixInfo & rhs_info)224 inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
225 {
226     ARM_COMPUTE_ERROR_ON(rhs_info.n0 == 0);
227     ARM_COMPUTE_ERROR_ON(rhs_info.k0 == 0);
228     ARM_COMPUTE_ERROR_ON(rhs_info.h0 == 0);
229 
230     // Input width/height
231     const unsigned int input_width  = a.dimension(0);
232     const unsigned int input_height = a.dimension(1);
233 
234     // Number of horizontal/vertical blocks in the input tensor
235     const unsigned int num_horiz_blocks = std::ceil(input_width / static_cast<float>(rhs_info.n0));
236     const unsigned int num_vert_blocks  = std::ceil(input_height / static_cast<float>(rhs_info.k0));
237 
238     // Block size
239     const unsigned int block_size = rhs_info.n0 * rhs_info.k0;
240 
241     // Output width/height
242     const unsigned int output_width  = block_size * num_vert_blocks * rhs_info.h0;
243     const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0));
244 
245     TensorShape rhs_shape{ a.tensor_shape() };
246     rhs_shape.set(0, output_width);
247     rhs_shape.set(1, output_height);
248 
249     return rhs_shape;
250 }
251 
252 /** Calculate the interleaved shape of an input tensor
253  *
254  * @param[in] a                         Input tensor info
255  * @param[in] mult_interleave4x4_height (Optional) Interleave4x4 height
256  * @param[in] reinterpret_input_as_3d   (Optional)  Set to true if the input need to be interpreted as 3d
257  *
258  * @return the calculated shape
259  */
260 inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
261 {
262     // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height
263     ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1);
264     const int   interleave_width = 4 * mult_interleave4x4_height;
265     TensorShape shape_interleaved_a{ a.tensor_shape() };
266     shape_interleaved_a.set(0, a.dimension(0) * interleave_width);
267     if(reinterpret_input_as_3d)
268     {
269         const int M      = a.dimension(1) * a.dimension(2);
270         const int height = std::ceil(M / static_cast<float>(interleave_width));
271         shape_interleaved_a.set(1, height);
272 
273         // When the data format is NHWC and the shapes are Nx1x1
274         // the tensor shape num_dimensions is automatically set to 1 instead of 3.
275         // To avoid failures by removing a dimension that doesn't exist
276         // check if the number of dimensions is greater than 2.
277         if(shape_interleaved_a.num_dimensions() > 2)
278         {
279             shape_interleaved_a.remove_dimension(2);
280         }
281     }
282     else
283     {
284         shape_interleaved_a.set(1, std::ceil(a.dimension(1) / static_cast<float>(interleave_width)));
285     }
286 
287     return shape_interleaved_a;
288 }
289 
290 /** Calculate the reshaped shape of the weights to use in depthwise convolution
291  *
292  * @param[in] input Input tensor info
293  * @param[in] info  Depthwise convolution information to be used for reshaping.
294  *
295  * @return the calculated shape
296  */
compute_reshaped_depthwise_weights_shape(const ITensorInfo & input,const DepthwiseConvolutionReshapeInfo & info)297 inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &input, const DepthwiseConvolutionReshapeInfo &info)
298 {
299     const auto  data_layout = input.data_layout();
300     TensorShape weights_shape{};
301 
302     const int    width_idx    = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
303     const int    height_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
304     const int    channel_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
305     const size_t num_channels = input.dimension(channel_idx);
306     const size_t num_rows     = input.dimension(height_idx);
307     const size_t num_cols     = input.dimension(width_idx);
308 
309     weights_shape.set(0, num_rows * num_cols * info.c0);
310     weights_shape.set(1, DIV_CEIL(num_channels, info.c0));
311     return weights_shape;
312 }
313 
314 /** Calculate the transposed 1xW shape
315  *
316  * @param[in] b Input tensor info
317  *
318  * @return the calculated shape
319  */
compute_transpose1xW_shape(const ITensorInfo & b)320 inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b)
321 {
322     // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
323     TensorShape shape_transposed1xW_b{ b.tensor_shape() };
324     shape_transposed1xW_b.set(0, b.dimension(1) * 16);
325     shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f));
326 
327     return shape_transposed1xW_b;
328 }
329 
330 /** Calculate the transposed 1xW width element shape
331  *
332  * @param[in] b                       Input tensor info
333  * @param[in] mult_transpose1xW_width (Optional) Transpose1xW width
334  *
335  * @return the calculated shape
336  */
337 inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width = 1)
338 {
339     // Note: mult_transpose1xW_width expresses the number of chunks with size 1x(W) we want to store on the same row
340     //       The transpose1xW output matrix will have the following shape:
341     //       [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width
342     ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1);
343     TensorShape  shape_transposed1xW_b{ b.tensor_shape() };
344     const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width;
345     shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width);
346     shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width))));
347 
348     return shape_transposed1xW_b;
349 }
350 
351 /** Calculate the reductionA shape used in GEMMLowp
352  *
353  * @param[in] b Input tensor info
354  *
355  * @return the calculated shape
356  */
compute_reductionA_shape(const ITensorInfo & b)357 inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
358 {
359     TensorShape shape_vector_sum_col{ b.tensor_shape() };
360     if(shape_vector_sum_col.num_dimensions() > 1)
361     {
362         shape_vector_sum_col.remove_dimension(1);
363     }
364 
365     return shape_vector_sum_col;
366 }
367 
368 /** Calculate the reductionB shape used in GEMMLowp
369  *
370  * @param[in] a Input tensor info
371  *
372  * @return the calculated shape
373  */
compute_reductionB_shape(const ITensorInfo & a)374 inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
375 {
376     TensorShape shape_vector_sum_row{ a.tensor_shape() };
377     shape_vector_sum_row.set(Window::DimX, a.dimension(1));
378     if(shape_vector_sum_row.num_dimensions() > 1)
379     {
380         shape_vector_sum_row.remove_dimension(1);
381     }
382 
383     return shape_vector_sum_row;
384 }
385 
386 /** Calculate the Col2Im shape
387  *
388  * @param[in] input           Input tensor info
389  * @param[in] convolved_dims  Convolved dimensions
390  * @param[in] batch_size_on_z True if batch size is on z axis
391  * @param[in] num_groups      (Optional)  Number of groups when performing a grouped convolution
392  *
393  * @return the calculated shape
394  */
395 inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1)
396 {
397     ARM_COMPUTE_ERROR_ON(num_groups == 0);
398     ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area()));
399     ARM_COMPUTE_ERROR_ON((num_groups > 1) && input.tensor_shape()[2] != num_groups);
400 
401     const DataLayout data_layout = input.data_layout();
402     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
403     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
404     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
405 
406     TensorShape col2im_shape{ input.tensor_shape() };
407     // If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape,
408     // as first three will be override by H,W,C data
409     if(batch_size_on_z && num_groups == 1)
410     {
411         col2im_shape.shift_right(1);
412     }
413     col2im_shape.set(width_idx, convolved_dims.width);
414     col2im_shape.set(height_idx, convolved_dims.height);
415     col2im_shape.set(channel_idx, input.tensor_shape()[0] * num_groups);
416 
417     return col2im_shape;
418 }
419 
420 /** Calculate the transposed shape of a tensor
421  *
422  * @param[in] input Input tensor info
423  *
424  * @return the calculated shape
425  */
compute_transposed_shape(const ITensorInfo & input)426 inline TensorShape compute_transposed_shape(const ITensorInfo &input)
427 {
428     TensorShape shape_transposed{ input.tensor_shape() };
429 
430     shape_transposed.set(0, input.dimension(1));
431     shape_transposed.set(1, input.dimension(0));
432 
433     return shape_transposed;
434 }
435 
436 /** Calculate the depthwise convolution output shape of a tensor
437  *
438  * @param[in] input            Input tensor info
439  * @param[in] weights          Weights tensor info
440  * @param[in] conv_info        Padding and stride information to use for the convolution.
441  * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth.
442  * @param[in] dilation         Dilation, in elements, across x and y. Defaults to (1, 1).
443  *
444  * @return the calculated shape
445  */
446 inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U,
447                                                        1U))
448 {
449     const TensorShape input_shape{ input.tensor_shape() };
450     const TensorShape weights_shape{ weights.tensor_shape() };
451 
452     const DataLayout data_layout = input.data_layout();
453     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
454     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
455     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
456 
457     const DataLayout weights_data_layout = weights.data_layout();
458     const int        weights_width_idx   = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
459     const int        weights_height_idx  = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
460 
461     unsigned int output_width  = 0;
462     unsigned int output_height = 0;
463     std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
464                                                               weights_shape[weights_width_idx], weights_shape[weights_height_idx],
465                                                               conv_info, dilation);
466 
467     TensorShape output_shape{ input_shape };
468     output_shape.set(width_idx, output_width);
469     output_shape.set(height_idx, output_height);
470     output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier);
471 
472     return output_shape;
473 }
474 
475 /** Calculate the upsampled output shape used for deconvolution
476  *
477  * @param[in] input    Input tensor info
478  * @param[in] weights  Weights tensor shape
479  * @param[in] sx       Stride on x axis
480  * @param[in] sy       Stride on y axis
481  * @param[in] out_dims Output shape dimensions
482  * @param[in] padx     Padding on x axis
483  * @param[in] pady     Padding on y axis
484  *
485  * @return the calculated shape
486  */
compute_deconvolution_upsampled_shape(const ITensorInfo & input,const ITensorInfo & weights,unsigned int sx,unsigned int sy,std::pair<unsigned int,unsigned int> & out_dims,uint32_t & padx,uint32_t & pady)487 inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy,
488                                                          std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady)
489 {
490     const DataLayout data_layout = input.data_layout();
491     const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
492     const size_t     idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
493 
494     // Find the upsampled dimensions
495     unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
496     unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
497 
498     // Find the padding needed for the convolution with stride 1 in order to match output shape
499     padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
500     pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
501     out_x += padx;
502     out_y += pady;
503 
504     TensorShape scale_out_shape(input.tensor_shape());
505     scale_out_shape.set(idx_w, out_x);
506     scale_out_shape.set(idx_h, out_y);
507 
508     return scale_out_shape;
509 }
510 
511 /** Calculate the output shape of the deconvolution layer
512  *
513  * @param[in] out_dims Output x and y shape dimensions
514  * @param[in] input    Input tensor info
515  * @param[in] weights  Weights tensor shape
516  *
517  * @return the calculated shape
518  */
compute_deconvolution_output_shape(const std::pair<unsigned int,unsigned int> & out_dims,const ITensorInfo & input,const ITensorInfo & weights)519 inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
520 {
521     const TensorShape input_shape{ input.tensor_shape() };
522     const TensorShape weights_shape{ weights.tensor_shape() };
523 
524     const DataLayout data_layout = input.data_layout();
525     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
526     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
527     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
528     const int        batch_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
529 
530     TensorShape out_shape{ input_shape };
531     out_shape.set(width_idx, out_dims.first);
532     out_shape.set(height_idx, out_dims.second);
533     out_shape.set(channel_idx, weights_shape[batch_idx]);
534     return out_shape;
535 }
536 
537 /** Calculate the im2col output shape of a tensor
538  *
539  * @param[in] input           Input tensor info
540  * @param[in] kernel_dims     The kernel dimensions (width and height).
541  * @param[in] conv_info       Contains padding and stride information
542  * @param[in] has_bias        In case biases are provided expands the matrix with 1
543  * @param[in] dilation        Dilation, in elements, across x and y
544  * @param[in] batch_size_on_z True if batch size is on z axis
545  * @param[in] num_groups      (Optional)  Number of groups when performing a grouped convolution
546  *
547  * @return the calculated shape
548  */
549 inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
550                                              unsigned int num_groups = 1)
551 {
552     // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ]                           if batch_size_on_z == true
553     //                       or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ]  if batch_size_on_z == false
554 
555     ARM_COMPUTE_ERROR_ON(num_groups == 0);
556     ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW);
557     ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z);
558 
559     TensorShape output_shape{ input->tensor_shape() };
560 
561     const DataLayout data_layout = input->data_layout();
562     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
563     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
564     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
565 
566     std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
567     output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT
568     output_shape.set(1, (out_dims.first * out_dims.second));
569     if(batch_size_on_z && output_shape.num_dimensions() >= 3)
570     {
571         output_shape.remove_dimension(2);
572     }
573     else
574     {
575         output_shape.set(2, num_groups);
576     }
577 
578     return output_shape;
579 }
580 
581 /** Calculate the flattened output shape of a tensor
582  *
583  * @param[in] input Input tensor info
584  *
585  * @return the calculated shape
586  */
compute_flatten_shape(const ITensorInfo * input)587 inline TensorShape compute_flatten_shape(const ITensorInfo *input)
588 {
589     // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
590 
591     TensorShape output_shape{ input->tensor_shape() };
592 
593     output_shape.collapse(3);
594 
595     return output_shape;
596 }
597 
598 /** Calculate the softmax output shape of a tensor
599  *
600  * @param[in] input Input tensor info
601  * @param[in] axis  (Optional) Softmax axis
602  *
603  * @return the calculated shape
604  */
605 inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = 1)
606 {
607     // The output shape will be a 2D version of the input. For instance:
608     // - [x,y,z] and axis 1 will return [x, y*z]
609     // - [x,y,z,w] and axis 2 will return [x*y, w*z]
610     // - [x,y,z,w] and axis 3 will return [x*y*z, w]
611     TensorShape shape2D = input->tensor_shape();
612 
613     if(axis < input->num_dimensions())
614     {
615         // Collapse from axis onward (this changes the shape)
616         shape2D.collapse_from(axis);
617 
618         // Collapse the rest (collapse is inclusive)
619         shape2D.collapse(shape2D.num_dimensions() - 1);
620     }
621     else
622     {
623         // Collapse everything
624         shape2D.collapse(shape2D.num_dimensions());
625     }
626 
627     if(axis == 0)
628     {
629         // If axis is zero the first dim should be one. Since
630         // collapse is an inclusive operation we need to shift
631         shape2D.shift_right(1);
632     }
633 
634     return shape2D;
635 }
636 
637 /** Calculate the winograd filter transform shape
638  *
639  * @param[in] input         Input tensor info
640  * @param[in] winograd_info Winograd information
641  *
642  * @return the calculated shape
643  */
compute_winograd_filter_transform_shape(const ITensorInfo & input,const WinogradInfo & winograd_info)644 inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
645 {
646     TensorShape tensor_shape{ input.tensor_shape() };
647 
648     const Size2D kernel_size      = winograd_info.kernel_size;
649     const Size2D output_tile_size = winograd_info.output_tile_size;
650     const Size2D input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
651 
652     tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
653     tensor_shape.set(Window::DimX, input.dimension(3));
654     tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)));
655     tensor_shape.set(Window::DimZ, input_tile_size.area());
656 
657     return tensor_shape;
658 }
659 
660 /** Calculate the winograd input transform shape
661  *
662  * @param[in] input         Input tensor info
663  * @param[in] winograd_info Winograd information
664  *
665  * @return the calculated shape
666  */
compute_winograd_input_transform_shape(const ITensorInfo & input,const WinogradInfo & winograd_info)667 inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
668 {
669     const PadStrideInfo conv_info        = winograd_info.convolution_info;
670     const Size2D        kernel_size      = winograd_info.kernel_size;
671     const Size2D        output_tile_size = winograd_info.output_tile_size;
672     const Size2D        input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
673 
674     const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
675     const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
676     const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
677 
678     // Compute the number of output tiles along the x and y direction of size "output_tile_size"
679     const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]),
680                                                                 kernel_size,
681                                                                 output_tile_size,
682                                                                 conv_info);
683 
684     const unsigned int width  = input.tensor_shape()[idx_c];
685     const unsigned int height = num_tiles.area();
686     const unsigned int depth  = input_tile_size.area();
687 
688     TensorShape output_shape{ input.tensor_shape() };
689     output_shape.set(0, width);
690     output_shape.set(1, height);
691     output_shape.set(2, depth);
692 
693     return output_shape;
694 }
695 
696 /** Calculate the winograd output transform shape
697  *
698  * @param[in] input         Input tensor info
699  * @param[in] winograd_info Winograd information
700  *
701  * @return the calculated shape
702  */
compute_winograd_output_transform_shape(const ITensorInfo & input,const WinogradInfo & winograd_info)703 inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
704 {
705     const PadStrideInfo conv_info        = winograd_info.convolution_info;
706     const Size2D        kernel_size      = winograd_info.kernel_size;
707     const Size2D        input_dimensions = winograd_info.input_dimensions;
708     const DataLayout    data_layout      = winograd_info.output_data_layout;
709 
710     // Compute output shape
711     unsigned int output_width  = 0;
712     unsigned int output_height = 0;
713     std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
714                                                               kernel_size.width, kernel_size.height, conv_info);
715 
716     TensorShape tensor_shape{ input.tensor_shape() };
717 
718     // Output dimension
719     const unsigned int out_w = output_width;
720     const unsigned int out_h = output_height;
721     const unsigned int out_c = input.dimension(0);
722 
723     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH), out_w);
724     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT), out_h);
725     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL), out_c);
726 
727     return tensor_shape;
728 }
729 
730 /** Calculate the deep convolution shape output shape of a tensor
731  *
732  * @param[in] input     Input tensor info
733  * @param[in] weights   Weights tensor info
734  * @param[in] conv_info Contains padding and stride information
735  *
736  * @return the calculated shape
737  */
compute_deep_convolution_shape(const ITensorInfo & input,const ITensorInfo & weights,PadStrideInfo conv_info)738 inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info)
739 {
740     const TensorShape input_shape{ input.tensor_shape() };
741     const TensorShape weights_shape{ weights.tensor_shape() };
742 
743     const size_t idx_width   = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
744     const size_t idx_height  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
745     const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
746 
747     const unsigned int input_width         = input_shape[idx_width];
748     const unsigned int input_height        = input_shape[idx_height];
749     const unsigned int weights_width       = weights_shape[idx_width];
750     const unsigned int weights_height      = weights_shape[idx_height];
751     const unsigned int weights_out_channel = weights_shape[3];
752     unsigned int       output_width        = 0;
753     unsigned int       output_height       = 0;
754     std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
755 
756     TensorShape output_shape{ input_shape };
757     output_shape.set(idx_width, output_width);
758     output_shape.set(idx_height, output_height);
759     output_shape.set(idx_channel, weights_out_channel);
760 
761     return output_shape;
762 }
763 
764 /** Calculate the min/max shape output shape of a tensor
765  *
766  * @param[in] input Input tensor info
767  *
768  * @return the calculated shape
769  */
compute_min_max_shape(const ITensorInfo * input)770 inline TensorShape compute_min_max_shape(const ITensorInfo *input)
771 {
772     TensorShape output_shape{ input->tensor_shape() };
773     output_shape.set(Window::DimX, 2);
774     output_shape.remove_dimension(1);
775     output_shape.remove_dimension(1);
776 
777     return output_shape;
778 }
779 
780 /** Calculate the output pool shape of a tensor
781  *
782  * @param[in] input     Input tensor info
783  * @param[in] pool_info Pooling layer info
784  *
785  * @return the calculated shape
786  */
compute_pool_shape(const ITensorInfo & input,PoolingLayerInfo pool_info)787 inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
788 {
789     unsigned int pooled_w = 0;
790     unsigned int pooled_h = 0;
791 
792     TensorShape output_shape{ input.tensor_shape() };
793 
794     const bool         is_global_pooling = pool_info.is_global_pooling;
795     const unsigned int idx_width         = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
796     const unsigned int idx_height        = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
797     const unsigned int pool_size_x       = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width;
798     const unsigned int pool_size_y       = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height;
799 
800     std::tie(pooled_w, pooled_h) = scaled_dimensions(output_shape[idx_width],
801                                                      output_shape[idx_height],
802                                                      pool_size_x,
803                                                      pool_size_y,
804                                                      pool_info.pad_stride_info);
805 
806     output_shape.set(idx_width, pooled_w);
807     output_shape.set(idx_height, pooled_h);
808 
809     return output_shape;
810 }
811 
812 /** Calculate the output unpool shape of a tensor
813  *
814  * @param[in] input     Input tensor info
815  * @param[in] pool_info Pooling layer info
816  *
817  * @return the calculated shape
818  */
compute_unpool_shape(const ITensorInfo & input,PoolingLayerInfo pool_info)819 inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
820 {
821     const unsigned int idx_width   = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
822     const unsigned int idx_height  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
823     const TensorShape  input_shape = input.tensor_shape();
824     ARM_COMPUTE_ERROR_ON(input_shape[idx_height] <= 1 || input_shape[idx_width] <= 1);
825     const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
826     const unsigned int  stride_x        = pad_stride_info.stride().first;
827     const unsigned int  stride_y        = pad_stride_info.stride().second;
828 
829     const int pad_left   = pad_stride_info.pad_left();
830     const int pad_top    = pad_stride_info.pad_top();
831     const int pad_right  = pad_stride_info.pad_right();
832     const int pad_bottom = pad_stride_info.pad_bottom();
833 
834     TensorShape        output_shape = input_shape;
835     const unsigned int out_width    = (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
836     const unsigned int out_height   = (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
837 
838     output_shape.set(idx_width, out_width);
839     output_shape.set(idx_height, out_height);
840     return output_shape;
841 }
842 
843 /** Calculate the output roi align shape of a tensor
844  *
845  * @param[in] input     Input tensor info
846  * @param[in] rois      Rois tensor info
847  * @param[in] pool_info Pooling layer info
848  *
849  * @return the calculated shape
850  */
compute_roi_align_shape(const ITensorInfo & input,const ITensorInfo & rois,ROIPoolingLayerInfo pool_info)851 inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
852 {
853     TensorShape output_shape{ input.tensor_shape() };
854 
855     const unsigned int idx_width  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
856     const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
857 
858     output_shape.set(idx_width, pool_info.pooled_width());
859     output_shape.set(idx_height, pool_info.pooled_height());
860     output_shape.set(3, rois.dimension(1));
861 
862     return output_shape;
863 }
864 
865 /** Calculate the RNN shape of a tensor
866  *
867  * @param[in] input      Input tensor info
868  * @param[in] batch_size Batch size
869  *
870  * @return the calculated shape
871  */
compute_rnn_shape(const ITensorInfo * input,const unsigned int batch_size)872 inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size)
873 {
874     TensorShape output_shape{ input->tensor_shape() };
875     output_shape.set(1, batch_size);
876 
877     return output_shape;
878 }
879 
880 /** Calculate the matrix multiplication output shape of two tensors
881  *
882  * @param[in] input0                    First input tensor info
883  * @param[in] input1                    Second input tensor info
884  * @param[in] is_interleaved_transposed True if the input is interleaved transposed
885  * @param[in] reshape_info              GEMM reshape info
886  *
887  * @return the calculated shape
888  */
compute_mm_shape(const ITensorInfo & input0,const ITensorInfo & input1,bool is_interleaved_transposed,const GEMMReshapeInfo & reshape_info)889 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
890 {
891     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
892     ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
893 
894     const bool reinterpret_input_as_3d  = reshape_info.reinterpret_input_as_3d();
895     const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0;
896     const int  depth_output_gemm3d      = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1;
897     const int  m                        = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
898 
899     // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
900     // dimension of the output tensor
901     const int dim0 = is_interleaved_transposed ? reshape_info.n() : input1.dimension(0);
902     const int dim1 = is_interleaved_transposed ? reshape_info.m() / depth_output_gemm3d : m / depth_output_gemm3d;
903     const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
904     const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3];
905 
906     TensorShape output_shape{ input0.tensor_shape() };
907 
908     output_shape.set(0, dim0);
909     output_shape.set(1, dim1);
910     output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2);
911     output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3);
912     output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1);
913 
914     return output_shape;
915 }
916 
917 /** Calculate the matrix multiplication output shape of two tensors
918  *
919  * @note Deprecated. Remove when GEMMReshapeInfo is not used anymore by any other kernels
920  *
921  * @param[in] input0    First input tensor info
922  * @param[in] input1    Second input tensor info
923  * @param[in] gemm_info GEMM reshape info
924  *
925  * @return the calculated shape
926  */
compute_mm_shape(const ITensorInfo & input0,const ITensorInfo & input1,const GEMMReshapeInfo & gemm_info)927 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
928 {
929     ARM_COMPUTE_UNUSED(input1);
930     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
931 
932     const bool reinterpret_input_as_3d  = gemm_info.reinterpret_input_as_3d();
933     const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
934     const int  depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
935 
936     TensorShape output_shape{ input0.tensor_shape() };
937 
938     if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
939     {
940         output_shape.set(0, gemm_info.n());
941         output_shape.set(1, gemm_info.m());
942     }
943     else
944     {
945         // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
946         // dimension of the output tensor
947         const int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
948         output_shape.set(0, gemm_info.n());
949         output_shape.set(1, gemm_info.m() / depth_output_gemm3d);
950         output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size);
951         output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1);
952     }
953 
954     return output_shape;
955 }
956 
957 /** Calculate the matrix multiplication output shape of two tensors
958  *
959  * @param[in] input0    First input tensor info
960  * @param[in] input1    Second input tensor info
961  * @param[in] gemm_info GEMM kernel info used to retrieve the original dimensions of the input matrices
962  *
963  * @return the calculated shape
964  */
compute_mm_shape(const ITensorInfo & input0,const ITensorInfo & input1,const GEMMKernelInfo & gemm_info)965 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
966 {
967     ARM_COMPUTE_UNUSED(input1);
968     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
969 
970     const bool         reinterpret_input_as_3d  = gemm_info.reinterpret_input_as_3d;
971     const bool         reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
972     const unsigned int depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1;
973 
974     TensorShape output_shape{ input0.tensor_shape() };
975 
976     if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
977     {
978         output_shape.set(0, gemm_info.n);
979         output_shape.set(1, gemm_info.m);
980     }
981     else
982     {
983         // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
984         // dimension of the output tensor
985         const unsigned int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
986         output_shape.set(0, gemm_info.n);
987         output_shape.set(1, gemm_info.m / depth_output_gemm3d);
988         output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size);
989         output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1);
990     }
991 
992     return output_shape;
993 }
994 
995 /** Calculate the matrix multiplication output shape of two tensors
996  *
997  * @param[in] input           Input tensor info
998  * @param[in] gemm_3d_depth   (Optional)  GEMM 3d depth
999  * @param[in] batch_size_on_z (Optional) True if batch size is on z axis
1000  *
1001  * @return the calculated shape
1002  */
1003 inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
1004 {
1005     ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
1006 
1007     TensorShape output_shape = input.tensor_shape();
1008     if(gemm_3d_depth > 1)
1009     {
1010         if(batch_size_on_z)
1011         {
1012             output_shape.shift_right(1);
1013         }
1014         output_shape.set(0, input.tensor_shape().x());
1015         output_shape.set(1, input.tensor_shape().y() / gemm_3d_depth);
1016         output_shape.set(2, gemm_3d_depth);
1017     }
1018 
1019     return output_shape;
1020 }
1021 
1022 /** Calculate the strided slice output shape of a tensor
1023  *
1024  * @param[in] input            Input tensor info
1025  * @param[in] starts           The starts of the dimensions of the input tensor to be sliced
1026  * @param[in] ends             The ends of the dimensions of the input tensor to be sliced
1027  * @param[in] strides          The strides of the dimensions of the input tensor to be sliced
1028  * @param[in] begin_mask       If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
1029  * @param[in] end_mask         If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
1030  * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1
1031  *
1032  * @return the calculated shape
1033  */
compute_strided_slice_shape(const ITensorInfo & input,const Coordinates & starts,const Coordinates & ends,const Coordinates & strides,int32_t begin_mask,int32_t end_mask,int32_t shrink_axis_mask)1034 inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
1035                                                const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
1036                                                int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
1037 {
1038     using namespace arm_compute::helpers::tensor_transform;
1039     return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
1040 }
1041 
1042 /** Calculate the slice output shape of a tensor
1043  *
1044  * @param[in] input_shape Input tensor info
1045  * @param[in] starts      The starts of the dimensions of the input tensor to be sliced
1046  * @param[in] ends        The ends of the dimensions of the input tensor to be sliced
1047  *
1048  * @return the calculated shape
1049  */
compute_slice_shape(const TensorShape & input_shape,const Coordinates & starts,const Coordinates & ends)1050 inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
1051 {
1052     using namespace arm_compute::helpers::tensor_transform;
1053 
1054     return compute_strided_slice_output_shape(input_shape,
1055                                               starts, ends, BiStrides(),
1056                                               0, construct_slice_end_mask(ends), 0);
1057 }
1058 
1059 /** Calculate the batch to space output shape of a tensor
1060  *
1061  * @param[in] input   Input tensor info
1062  * @param[in] block_x Block shape x value
1063  * @param[in] block_y Block shape y value
1064  *
1065  * @return the calculated shape
1066  */
compute_batch_to_space_shape(const ITensorInfo * input,const int block_x,const int block_y)1067 inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y)
1068 {
1069     ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0);
1070 
1071     const DataLayout data_layout = input->data_layout();
1072     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1073     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1074     const int        idx_batch   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
1075 
1076     TensorShape output_shape{ input->tensor_shape() };
1077     output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x);
1078     output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y);
1079     output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y));
1080 
1081     return output_shape;
1082 }
1083 
1084 /** Calculate the depth to space output shape of a tensor
1085  *
1086  * @param[in] input_shape Input tensor shape
1087  * @param[in] data_layout Operation data layout
1088  * @param[in] block       Block shape value
1089  *
1090  * @return the calculated shape
1091  */
compute_depth_to_space_shape(const TensorShape & input_shape,DataLayout data_layout,int block)1092 inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, DataLayout data_layout, int block)
1093 {
1094     ARM_COMPUTE_ERROR_ON(block < 2);
1095 
1096     const int idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1097     const int idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1098     const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
1099 
1100     TensorShape output_shape{ input_shape };
1101     output_shape.set(idx_width, input_shape[idx_width] * block);
1102     output_shape.set(idx_height, input_shape[idx_height] * block);
1103     output_shape.set(idx_channel, input_shape[idx_channel] / (block * block));
1104 
1105     return output_shape;
1106 }
1107 
1108 /** Calculate the split output shape of a tensor
1109  *
1110  * @param[in] input      Input tensor info
1111  * @param[in] axis       Axis on which to split the input
1112  * @param[in] num_splits Number of splits
1113  *
1114  * @return the calculated shape
1115  */
compute_split_shape(const ITensorInfo * input,unsigned int axis,unsigned int num_splits)1116 inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int axis, unsigned int num_splits)
1117 {
1118     TensorShape empty_shape;
1119     empty_shape.set(0, 0);
1120 
1121     TensorShape out_shape{ input->tensor_shape() };
1122 
1123     // Return empty shape if axis is invalid
1124     if(axis > input->tensor_shape().num_dimensions())
1125     {
1126         return empty_shape;
1127     }
1128 
1129     size_t axis_size = out_shape[axis];
1130 
1131     // Return empty shape if num_split is not valid
1132     if(axis_size % num_splits)
1133     {
1134         return empty_shape;
1135     }
1136 
1137     out_shape[axis] = axis_size / num_splits;
1138     return out_shape;
1139 }
1140 
1141 /** Calculate the space to batch output shape of a tensor
1142  *
1143  * @param[in] input         Input tensor info
1144  * @param[in] block_x       Block shape x value
1145  * @param[in] block_y       Block shape y value
1146  * @param[in] padding_left  Left padding values
1147  * @param[in] padding_right Right padding values
1148  *
1149  * @return the calculated shape
1150  */
compute_space_to_batch_shape(const ITensorInfo * input,const int block_x,const int block_y,const Size2D & padding_left,const Size2D & padding_right)1151 inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right)
1152 {
1153     TensorShape output_shape{ input->tensor_shape() };
1154 
1155     const DataLayout data_layout = input->data_layout();
1156     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1157     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1158     const int        idx_batch   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
1159 
1160     ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_x != 0);
1161     ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_y != 0);
1162 
1163     output_shape.set(idx_width, (input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) / block_x);
1164     output_shape.set(idx_height, (input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) / block_y);
1165     output_shape.set(idx_batch, input->tensor_shape()[idx_batch] * block_x * block_y);
1166 
1167     return output_shape;
1168 }
1169 
1170 /** Calculate the space to batch output shape of a tensor
1171  *
1172  * @param[in] input       Input tensor info
1173  * @param[in] block_shape Block shape value
1174  *
1175  * @return the calculated shape
1176  */
compute_space_to_depth_shape(const ITensorInfo * input,int32_t block_shape)1177 inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape)
1178 {
1179     TensorShape output_shape{ input->tensor_shape() };
1180 
1181     const DataLayout data_layout = input->data_layout();
1182     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1183     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1184     const int        idx_depth   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
1185 
1186     output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape);
1187     output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape);
1188     output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape));
1189 
1190     return output_shape;
1191 }
1192 
1193 /** Calculate the prior box output shape of a tensor
1194  *
1195  * @param[in] input Input tensor info
1196  * @param[in] info  PriorBoxLayer info
1197  *
1198  * @return the calculated shape
1199  */
compute_prior_box_shape(const ITensorInfo & input,const PriorBoxLayerInfo & info)1200 inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const PriorBoxLayerInfo &info)
1201 {
1202     DataLayout   data_layout = input.data_layout();
1203     const size_t idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1204     const size_t idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1205     const int    num_priors  = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size();
1206 
1207     TensorShape output_shape{};
1208     output_shape.set(0, input.dimension(idx_w) * input.dimension(idx_h) * num_priors * 4);
1209     output_shape.set(1, 2);
1210 
1211     return output_shape;
1212 }
1213 
1214 /** Calculate the padded shape of a tensor
1215  *
1216  * @param[in] input_shape Input tensor shape
1217  * @param[in] padding     Paddings list
1218  *
1219  * @return the calculated shape
1220  */
compute_padded_shape(const TensorShape & input_shape,const PaddingList & padding)1221 inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding)
1222 {
1223     TensorShape padded_shape = input_shape;
1224     for(size_t dim = 0; dim < padding.size(); ++dim)
1225     {
1226         const auto    &padding_pair   = padding[dim];
1227         const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim];
1228         padded_shape.set(dim, padding_pair.first + shape_on_index + padding_pair.second);
1229     }
1230     return padded_shape;
1231 }
1232 
1233 /** Calculate the tiled shape of a tensor
1234  *
1235  * @param[in] input_shape Input tensor shape
1236  * @param[in] multiples   Paddings list
1237  *
1238  * @return the calculated shape
1239  */
compute_tiled_shape(const TensorShape & input_shape,const Multiples & multiples)1240 inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples)
1241 {
1242     TensorShape tiled_shape = input_shape;
1243     for(size_t dim = 0; dim < multiples.size(); ++dim)
1244     {
1245         tiled_shape.set(dim, input_shape[dim] * multiples[dim]);
1246     }
1247     return tiled_shape;
1248 }
1249 
1250 /** Calculate the reduced shape of a tensor given an axis
1251  *
1252  * @param[in] input     Input tensor info
1253  * @param[in] axis      Axis on which to perform reduction
1254  * @param[in] keep_dims (Optional) Whether to keep the dimension after reduction operation. Defaults to true.
1255  *
1256  * @return the calculated shape
1257  */
1258 inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true)
1259 {
1260     TensorShape output_shape{ input };
1261 
1262     if(!keep_dims)
1263     {
1264         output_shape.remove_dimension(axis);
1265     }
1266     else
1267     {
1268         output_shape.set(axis, 1);
1269     }
1270 
1271     return output_shape;
1272 }
1273 
1274 /** Calculate the upsampled shape of a tensor
1275  *
1276  * @param[in] input Input tensor info
1277  * @param[in] info  Contains stride information (x and y)
1278  *
1279  * @return the calculated shape
1280  */
compute_upsample_shape(const ITensorInfo & input,const Size2D & info)1281 inline TensorShape compute_upsample_shape(const ITensorInfo &input, const Size2D &info)
1282 {
1283     const DataLayout data_layout = input.data_layout();
1284     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1285     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1286 
1287     TensorShape        scale_out_shape(input.tensor_shape());
1288     const unsigned int out_x = input.dimension(idx_width) * info.x();
1289     const unsigned int out_y = input.dimension(idx_height) * info.y();
1290     scale_out_shape.set(idx_width, out_x);
1291     scale_out_shape.set(idx_height, out_y);
1292 
1293     return scale_out_shape;
1294 }
1295 
1296 /** Get the tensor shape
1297  *
1298  * @param[in] data Input data
1299  *
1300  * @return the extracted tensor shape
1301  */
1302 template <typename T>
extract_shape(T * data)1303 inline TensorShape extract_shape(T *data)
1304 {
1305     return data->info()->tensor_shape();
1306 }
1307 
extract_shape(ITensorInfo * data)1308 inline TensorShape extract_shape(ITensorInfo *data)
1309 {
1310     return data->tensor_shape();
1311 }
extract_shape(const ITensorInfo * data)1312 inline TensorShape extract_shape(const ITensorInfo *data)
1313 {
1314     return data->tensor_shape();
1315 }
1316 
extract_shape(const TensorShape * data)1317 inline TensorShape extract_shape(const TensorShape *data)
1318 {
1319     return *data;
1320 }
1321 
extract_shape(TensorShape * data)1322 inline TensorShape extract_shape(TensorShape *data)
1323 {
1324     return *data;
1325 }
1326 
1327 /** Calculate the unstack shape of a tensor
1328  *
1329  * @param[in] input_shape Input tensor shape
1330  * @param[in] axis        Axis on which to perform the unstack operation
1331  *
1332  * @return the calculated shape
1333  */
calculate_unstack_shape(TensorShape input_shape,unsigned int axis)1334 inline TensorShape calculate_unstack_shape(TensorShape input_shape, unsigned int axis)
1335 {
1336     ARM_COMPUTE_ERROR_ON(axis > input_shape.num_dimensions());
1337     input_shape.remove_dimension(axis);
1338     return input_shape;
1339 }
1340 
1341 /** Calculate the concatenate output shape of the concatenate operation along a single axis
1342  *
1343  * @param[in] input Vector containing the shapes of the inputs
1344  * @param[in] axis  Axis along which to concatenate the input tensors
1345  *
1346  * @return the calculated shape
1347  */
1348 template <typename T>
calculate_concatenate_shape(const std::vector<T * > & input,size_t axis)1349 inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, size_t axis)
1350 {
1351     TensorShape out_shape = extract_shape(input[0]);
1352 
1353 #if defined(ARM_COMPUTE_ASSERTS_ENABLED)
1354     // All dimensions must match except the axis one
1355     for(unsigned int i = 0; i < MAX_DIMS; ++i)
1356     {
1357         if(i == axis)
1358         {
1359             continue;
1360         }
1361 
1362         for(const auto &tensor : input)
1363         {
1364             ARM_COMPUTE_ERROR_ON(tensor == nullptr);
1365             const TensorShape shape = extract_shape(tensor);
1366             ARM_COMPUTE_ERROR_ON(out_shape[i] != shape[i]);
1367         }
1368     }
1369 #endif // defined(ARM_COMPUTE_ASSERTS_ENABLED)
1370 
1371     // Calculate output shape
1372     size_t new_size = 0;
1373     for(const auto &tensor : input)
1374     {
1375         const TensorShape shape = extract_shape(tensor);
1376         new_size += shape[axis];
1377     }
1378 
1379     out_shape.set(axis, new_size);
1380 
1381     return out_shape;
1382 }
1383 /** Calculate the stack output shape of a tensor
1384  *
1385  * @param[in] a           Input tensor info
1386  * @param[in] axis        Axis on which to perform the stack operation
1387  * @param[in] num_tensors Number of tensors to stack
1388  *
1389  * @return the calculated shape
1390  */
compute_stack_shape(const ITensorInfo & a,unsigned int axis,unsigned int num_tensors)1391 inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, unsigned int num_tensors)
1392 {
1393     ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions());
1394     ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4);
1395 
1396     TensorShape shape_out{ a.tensor_shape() };
1397     shape_out.set(axis, num_tensors);
1398 
1399     unsigned int i_shift = 0;
1400 
1401     for(unsigned int i = 0; i < a.num_dimensions(); ++i)
1402     {
1403         if(i == axis)
1404         {
1405             i_shift++;
1406         }
1407 
1408         shape_out.set(i + i_shift, a.tensor_shape()[i]);
1409     }
1410     return shape_out;
1411 }
1412 
compute_gather_shape(const TensorShape & input_shape,const TensorShape & indices_shape,uint32_t actual_axis)1413 inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
1414 {
1415     ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 1);
1416     ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4);
1417     ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions());
1418 
1419     TensorShape output_shape  = input_shape;
1420     output_shape[actual_axis] = indices_shape[0];
1421 
1422     return output_shape;
1423 }
1424 } // namespace shape_calculator
1425 } // namespace misc
1426 } // namespace arm_compute
1427 #endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */
1428