• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
25 #define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H
26 
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/ITensorInfo.h"
29 #include "arm_compute/core/KernelDescriptors.h"
30 #include "arm_compute/core/Utils.h"
31 #include "arm_compute/runtime/FunctionDescriptors.h"
32 
33 #include "arm_compute/core/utils/helpers/tensor_transform.h"
34 
35 #include <cmath>
36 
37 namespace arm_compute
38 {
39 namespace misc
40 {
41 namespace shape_calculator
42 {
43 /** Calculate the output tensor shape for the reduce mean operation
44  *
45  * @param[in] input          Input tensor shape
46  * @param[in] reduction_axis Reduction axis
47  * @param[in] keep_dims      Flag to indicate if dimensions are kept
48  *
49  * @return the calculated shape
50  */
calculate_reduce_mean_shape(ITensorInfo * input,const Coordinates & reduction_axis,bool keep_dims)51 inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims)
52 {
53     const int   reduction_ops = reduction_axis.num_dimensions();
54     Coordinates axis_local    = reduction_axis;
55     const int   input_dims    = input->num_dimensions();
56     convert_negative_axis(axis_local, input_dims);
57     TensorShape out_shape = input->tensor_shape();
58     // Configure reshape layer if we want to drop the dimensions
59     if(!keep_dims)
60     {
61         // We have to sort the reduction axis vectors in order for remove_dimension
62         // to work properly
63         std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
64         for(int i = 0; i < reduction_ops; ++i)
65         {
66             out_shape.remove_dimension(axis_local[i] - i);
67         }
68         return out_shape;
69     }
70     else
71     {
72         for(int i = 0; i < reduction_ops; ++i)
73         {
74             out_shape.set(axis_local[i], 1);
75         }
76         return out_shape;
77     }
78 }
79 /** Calculate the output tensor shape of a vector input given the convolution dimensions
80  *
81  * @param[in] input       Input tensor shape
82  * @param[in] conv_w      Convolution width
83  * @param[in] conv_h      Convolution height
84  * @param[in] data_layout Data layout
85  *
86  * @return the calculated shape
87  */
compute_vector_to_tensor_output_shape(const TensorShape & input,size_t conv_w,size_t conv_h,const DataLayout & data_layout)88 inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
89 {
90     const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
91     const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
92     const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
93 
94     TensorShape output_shape(input);
95     output_shape.set(idx_w, conv_w);
96     output_shape.set(idx_h, conv_h);
97     output_shape.set(idx_c, input.x() / (conv_w * conv_h));
98 
99     return output_shape;
100 }
101 
102 /** Calculate the permuted shape of an input given a permutation vector
103  *
104  * @param[in] input Input tensor info
105  * @param[in] perm  Permutation vector
106  *
107  * @return the calculated shape
108  */
compute_permutation_output_shape(const ITensorInfo & input,const PermutationVector & perm)109 inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm)
110 {
111     TensorShape output_shape = input.tensor_shape();
112     permute(output_shape, perm);
113     return output_shape;
114 }
115 
116 /** Calculate the output shape of the reorg layer given a stride
117  *
118  * @param[in] input  Input tensor info
119  * @param[in] stride Stride
120  *
121  * @return the calculated shape
122  */
compute_reorg_output_shape(const ITensorInfo & input,int32_t stride)123 inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t stride)
124 {
125     const size_t idx_width   = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
126     const size_t idx_height  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
127     const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
128 
129     ARM_COMPUTE_ERROR_ON(stride <= 0);
130     ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride");
131     ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride");
132 
133     TensorShape output_shape{ input.tensor_shape() };
134 
135     output_shape.set(idx_width, output_shape[idx_width] / stride);
136     output_shape.set(idx_height, output_shape[idx_height] / stride);
137     output_shape.set(idx_channel, output_shape[idx_channel] * stride * stride);
138 
139     return output_shape;
140 }
141 
142 /** Calculate the reshaped shape of the weights
143  *
144  * @param[in] weights    Weights tensor info
145  * @param[in] has_bias   (Optional) Set to true if there is bias
146  * @param[in] num_groups (Optional) Number of groups
147  *
148  * @return the calculated shape of the reshaped weights
149  */
150 inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
151 {
152     // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
153     ARM_COMPUTE_ERROR_ON(num_groups == 0);
154     ARM_COMPUTE_ERROR_ON(weights.data_layout() == DataLayout::NHWC && num_groups > 1);
155     ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
156 
157     // Calculate output shape
158     TensorShape weights_reshaped{ weights.tensor_shape() };
159     weights_reshaped.set(3, weights_reshaped[3] / num_groups);
160 
161     weights_reshaped.collapse(3);
162     const size_t tmp_dim = weights_reshaped[0];
163     weights_reshaped.set(0, weights_reshaped[1]);
164     weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
165     if(weights.num_dimensions() < 5)
166     {
167         weights_reshaped.set(2, num_groups);
168     }
169 
170     return weights_reshaped;
171 }
172 
173 /** Calculate the Left Hand Side matrix reshaped shape
174  *
175  * @param[in] a                       Input tensor info
176  * @param[in] lhs_info                Left Hand Side matrix information
177  * @param[in] reinterpret_input_as_3d (Optional) Set to true if the input need to be interpreted as 3d
178  *
179  * @return the calculated shape
180  */
181 inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false)
182 {
183     ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0);
184     ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0);
185     ARM_COMPUTE_ERROR_ON(lhs_info.v0 == 0);
186 
187     // Input width/height
188     const unsigned int input_width  = a.dimension(0);
189     const unsigned int input_height = reinterpret_input_as_3d ? a.dimension(1) * a.dimension(2) : a.dimension(1);
190 
191     // Number of horizontal/vertical blocks in the input tensor
192     const unsigned int num_horiz_blocks = std::ceil(input_width / static_cast<float>(lhs_info.k0));
193     const unsigned int num_vert_blocks  = std::ceil(input_height / static_cast<float>(lhs_info.m0));
194 
195     // Block size
196     const unsigned int block_size = lhs_info.m0 * lhs_info.k0;
197 
198     // Output width/height
199     const unsigned int output_width  = block_size * num_horiz_blocks * lhs_info.v0;
200     const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0));
201 
202     TensorShape lhs_shape{ a.tensor_shape() };
203     lhs_shape.set(0, output_width);
204     lhs_shape.set(1, output_height);
205 
206     if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
207     {
208         // When the data format is NHWC and the shapes are Nx1x1
209         // the tensor shape num_dimensions is automatically set to 1 instead of 3.
210         // To avoid failures by removing a dimension that doesn't exist
211         // check if the number of dimensions is greater than 2.
212         lhs_shape.remove_dimension(2);
213     }
214 
215     return lhs_shape;
216 }
217 
218 /** Calculate the Right Hand Side matrix reshaped shape
219  *
220  * @param[in] a        Input tensor info
221  * @param[in] rhs_info Right Hand Side matrix information
222  *
223  * @return the calculated shape
224  */
compute_rhs_reshaped_shape(const ITensorInfo & a,const GEMMRHSMatrixInfo & rhs_info)225 inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRHSMatrixInfo &rhs_info)
226 {
227     ARM_COMPUTE_ERROR_ON(rhs_info.n0 == 0);
228     ARM_COMPUTE_ERROR_ON(rhs_info.k0 == 0);
229     ARM_COMPUTE_ERROR_ON(rhs_info.h0 == 0);
230 
231     // Input width/height
232     const unsigned int input_width  = a.dimension(0);
233     const unsigned int input_height = a.dimension(1);
234 
235     // Number of horizontal/vertical blocks in the input tensor
236     const unsigned int num_horiz_blocks = std::ceil(input_width / static_cast<float>(rhs_info.n0));
237     const unsigned int num_vert_blocks  = std::ceil(input_height / static_cast<float>(rhs_info.k0));
238 
239     // Block size
240     const unsigned int block_size = rhs_info.n0 * rhs_info.k0;
241 
242     // Output width/height
243     const unsigned int output_width  = block_size * num_vert_blocks * rhs_info.h0;
244     const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0));
245 
246     TensorShape rhs_shape{ a.tensor_shape() };
247     rhs_shape.set(0, output_width);
248     rhs_shape.set(1, output_height);
249 
250     return rhs_shape;
251 }
252 
253 /** Calculate the interleaved shape of an input tensor
254  *
255  * @param[in] a                         Input tensor info
256  * @param[in] mult_interleave4x4_height (Optional) Interleave4x4 height
257  * @param[in] reinterpret_input_as_3d   (Optional)  Set to true if the input need to be interpreted as 3d
258  *
259  * @return the calculated shape
260  */
261 inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
262 {
263     // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height
264     ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1);
265     const int   interleave_width = 4 * mult_interleave4x4_height;
266     TensorShape shape_interleaved_a{ a.tensor_shape() };
267     shape_interleaved_a.set(0, a.dimension(0) * interleave_width);
268     if(reinterpret_input_as_3d)
269     {
270         const int M      = a.dimension(1) * a.dimension(2);
271         const int height = std::ceil(M / static_cast<float>(interleave_width));
272         shape_interleaved_a.set(1, height);
273 
274         // When the data format is NHWC and the shapes are Nx1x1
275         // the tensor shape num_dimensions is automatically set to 1 instead of 3.
276         // To avoid failures by removing a dimension that doesn't exist
277         // check if the number of dimensions is greater than 2.
278         if(shape_interleaved_a.num_dimensions() > 2)
279         {
280             shape_interleaved_a.remove_dimension(2);
281         }
282     }
283     else
284     {
285         shape_interleaved_a.set(1, std::ceil(a.dimension(1) / static_cast<float>(interleave_width)));
286     }
287 
288     return shape_interleaved_a;
289 }
290 
291 /** Calculate the transposed 1xW shape
292  *
293  * @param[in] b Input tensor info
294  *
295  * @return the calculated shape
296  */
compute_transpose1xW_shape(const ITensorInfo & b)297 inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b)
298 {
299     // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
300     TensorShape shape_transposed1xW_b{ b.tensor_shape() };
301     shape_transposed1xW_b.set(0, b.dimension(1) * 16);
302     shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f));
303 
304     return shape_transposed1xW_b;
305 }
306 
307 /** Calculate the transposed 1xW width element shape
308  *
309  * @param[in] b                       Input tensor info
310  * @param[in] mult_transpose1xW_width (Optional) Transpose1xW width
311  *
312  * @return the calculated shape
313  */
314 inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width = 1)
315 {
316     // Note: mult_transpose1xW_width expresses the number of chunks with size 1x(W) we want to store on the same row
317     //       The transpose1xW output matrix will have the following shape:
318     //       [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width
319     ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1);
320     TensorShape  shape_transposed1xW_b{ b.tensor_shape() };
321     const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width;
322     shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width);
323     shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width))));
324 
325     return shape_transposed1xW_b;
326 }
327 
328 /** Calculate the reductionA shape used in GEMMLowp
329  *
330  * @param[in] b Input tensor info
331  *
332  * @return the calculated shape
333  */
compute_reductionA_shape(const ITensorInfo & b)334 inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
335 {
336     TensorShape shape_vector_sum_col{ b.tensor_shape() };
337     if(shape_vector_sum_col.num_dimensions() > 1)
338     {
339         shape_vector_sum_col.remove_dimension(1);
340     }
341 
342     return shape_vector_sum_col;
343 }
344 
345 /** Calculate the reductionB shape used in GEMMLowp
346  *
347  * @param[in] a Input tensor info
348  *
349  * @return the calculated shape
350  */
compute_reductionB_shape(const ITensorInfo & a)351 inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
352 {
353     TensorShape shape_vector_sum_row{ a.tensor_shape() };
354     shape_vector_sum_row.set(Window::DimX, a.dimension(1));
355     if(shape_vector_sum_row.num_dimensions() > 1)
356     {
357         shape_vector_sum_row.remove_dimension(1);
358     }
359 
360     return shape_vector_sum_row;
361 }
362 
363 /** Calculate the Col2Im shape
364  *
365  * @param[in] input           Input tensor info
366  * @param[in] convolved_dims  Convolved dimensions
367  * @param[in] batch_size_on_z True if batch size is on z axis
368  * @param[in] num_groups      (Optional)  Number of groups when performing a grouped convolution
369  *
370  * @return the calculated shape
371  */
372 inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1)
373 {
374     ARM_COMPUTE_ERROR_ON(num_groups == 0);
375     ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area()));
376     ARM_COMPUTE_ERROR_ON((num_groups > 1) && input.tensor_shape()[2] != num_groups);
377 
378     const DataLayout data_layout = input.data_layout();
379     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
380     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
381     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
382 
383     TensorShape col2im_shape{ input.tensor_shape() };
384     // If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape,
385     // as first three will be override by H,W,C data
386     if(batch_size_on_z && num_groups == 1)
387     {
388         col2im_shape.shift_right(1);
389     }
390     col2im_shape.set(width_idx, convolved_dims.width);
391     col2im_shape.set(height_idx, convolved_dims.height);
392     col2im_shape.set(channel_idx, input.tensor_shape()[0] * num_groups);
393 
394     return col2im_shape;
395 }
396 
397 /** Calculate the transposed shape of a tensor
398  *
399  * @param[in] input Input tensor info
400  *
401  * @return the calculated shape
402  */
compute_transposed_shape(const ITensorInfo & input)403 inline TensorShape compute_transposed_shape(const ITensorInfo &input)
404 {
405     TensorShape shape_transposed{ input.tensor_shape() };
406 
407     shape_transposed.set(0, input.dimension(1));
408     shape_transposed.set(1, input.dimension(0));
409 
410     return shape_transposed;
411 }
412 
413 /** Calculate the depthwise convolution output shape of a tensor
414  *
415  * @param[in] input   Input tensor info
416  * @param[in] weights Weights tensor info
417  * @param[in] info    Convolution info
418  *
419  * @return the calculated shape
420  */
compute_depthwise_convolution_shape(const ITensorInfo & input,const ITensorInfo & weights,const ConvolutionInfo & info)421 inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
422 {
423     const TensorShape input_shape{ input.tensor_shape() };
424     const TensorShape weights_shape{ weights.tensor_shape() };
425 
426     const DataLayout data_layout = input.data_layout();
427     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
428     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
429     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
430 
431     const DataLayout weights_data_layout = weights.data_layout();
432     const int        weights_width_idx   = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
433     const int        weights_height_idx  = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
434 
435     unsigned int output_width  = 0;
436     unsigned int output_height = 0;
437     std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
438                                                               weights_shape[weights_width_idx], weights_shape[weights_height_idx],
439                                                               info.pad_stride_info, info.dilation);
440 
441     TensorShape output_shape{ input_shape };
442     output_shape.set(width_idx, output_width);
443     output_shape.set(height_idx, output_height);
444     output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier);
445 
446     return output_shape;
447 }
448 
449 /** Calculate the upsampled output shape used for deconvolution
450  *
451  * @param[in] input    Input tensor info
452  * @param[in] weights  Weights tensor shape
453  * @param[in] sx       Stride on x axis
454  * @param[in] sy       Stride on y axis
455  * @param[in] out_dims Output shape dimensions
456  * @param[in] padx     Padding on x axis
457  * @param[in] pady     Padding on y axis
458  *
459  * @return the calculated shape
460  */
compute_deconvolution_upsampled_shape(const ITensorInfo & input,const ITensorInfo & weights,unsigned int sx,unsigned int sy,std::pair<unsigned int,unsigned int> & out_dims,uint32_t & padx,uint32_t & pady)461 inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy,
462                                                          std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady)
463 {
464     const DataLayout data_layout = input.data_layout();
465     const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
466     const size_t     idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
467 
468     // Find the upsampled dimensions
469     unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1;
470     unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1;
471 
472     // Find the padding needed for the convolution with stride 1 in order to match output shape
473     padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
474     pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
475     out_x += padx;
476     out_y += pady;
477 
478     TensorShape scale_out_shape(input.tensor_shape());
479     scale_out_shape.set(idx_w, out_x);
480     scale_out_shape.set(idx_h, out_y);
481 
482     return scale_out_shape;
483 }
484 
485 /** Calculate the output shape of the deconvolution layer
486  *
487  * @param[in] out_dims Output x and y shape dimensions
488  * @param[in] input    Input tensor info
489  * @param[in] weights  Weights tensor shape
490  *
491  * @return the calculated shape
492  */
compute_deconvolution_output_shape(const std::pair<unsigned int,unsigned int> & out_dims,const ITensorInfo & input,const ITensorInfo & weights)493 inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
494 {
495     const TensorShape input_shape{ input.tensor_shape() };
496     const TensorShape weights_shape{ weights.tensor_shape() };
497 
498     const DataLayout data_layout = input.data_layout();
499     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
500     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
501     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
502     const int        batch_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
503 
504     TensorShape out_shape{ input_shape };
505     out_shape.set(width_idx, out_dims.first);
506     out_shape.set(height_idx, out_dims.second);
507     out_shape.set(channel_idx, weights_shape[batch_idx]);
508     return out_shape;
509 }
510 
511 /** Calculate the im2col output shape of a tensor
512  *
513  * @param[in] input           Input tensor info
514  * @param[in] kernel_dims     The kernel dimensions (width and height).
515  * @param[in] conv_info       Contains padding and stride information
516  * @param[in] has_bias        In case biases are provided expands the matrix with 1
517  * @param[in] dilation        Dilation, in elements, across x and y
518  * @param[in] batch_size_on_z True if batch size is on z axis
519  * @param[in] num_groups      (Optional)  Number of groups when performing a grouped convolution
520  *
521  * @return the calculated shape
522  */
523 inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
524                                              unsigned int num_groups = 1)
525 {
526     // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ]                           if batch_size_on_z == true
527     //                       or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ]  if batch_size_on_z == false
528 
529     ARM_COMPUTE_ERROR_ON(num_groups == 0);
530     ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW);
531     ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z);
532 
533     TensorShape output_shape{ input->tensor_shape() };
534 
535     const DataLayout data_layout = input->data_layout();
536     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
537     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
538     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
539 
540     std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
541     output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT
542     output_shape.set(1, (out_dims.first * out_dims.second));
543     if(batch_size_on_z && output_shape.num_dimensions() >= 3)
544     {
545         output_shape.remove_dimension(2);
546     }
547     else
548     {
549         output_shape.set(2, num_groups);
550     }
551 
552     return output_shape;
553 }
554 
555 /** Calculate the flattened output shape of a tensor
556  *
557  * @param[in] input Input tensor info
558  *
559  * @return the calculated shape
560  */
compute_flatten_shape(const ITensorInfo * input)561 inline TensorShape compute_flatten_shape(const ITensorInfo *input)
562 {
563     // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
564 
565     TensorShape output_shape{ input->tensor_shape() };
566 
567     output_shape.collapse(3);
568 
569     return output_shape;
570 }
571 
572 /** Calculate the softmax output shape of a tensor
573  *
574  * @param[in] input Input tensor info
575  * @param[in] axis  (Optional) Softmax axis
576  *
577  * @return the calculated shape
578  */
579 inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = 1)
580 {
581     // The output shape will be a 2D version of the input. For instance:
582     // - [x,y,z] and axis 1 will return [x, y*z]
583     // - [x,y,z,w] and axis 2 will return [x*y, w*z]
584     // - [x,y,z,w] and axis 3 will return [x*y*z, w]
585     TensorShape shape2D = input->tensor_shape();
586 
587     if(axis < input->num_dimensions())
588     {
589         // Collapse from axis onward (this changes the shape)
590         shape2D.collapse_from(axis);
591 
592         // Collapse the rest (collapse is inclusive)
593         shape2D.collapse(shape2D.num_dimensions() - 1);
594     }
595     else
596     {
597         // Collapse everything
598         shape2D.collapse(shape2D.num_dimensions());
599     }
600 
601     if(axis == 0)
602     {
603         // If axis is zero the first dim should be one. Since
604         // collapse is an inclusive operation we need to shift
605         shape2D.shift_right(1);
606     }
607 
608     return shape2D;
609 }
610 
611 /** Calculate the winograd filter transform shape
612  *
613  * @param[in] input         Input tensor info
614  * @param[in] winograd_info Winograd information
615  *
616  * @return the calculated shape
617  */
compute_winograd_filter_transform_shape(const ITensorInfo & input,const WinogradInfo & winograd_info)618 inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
619 {
620     TensorShape tensor_shape{ input.tensor_shape() };
621 
622     const Size2D kernel_size      = winograd_info.kernel_size;
623     const Size2D output_tile_size = winograd_info.output_tile_size;
624     const Size2D input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
625 
626     tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
627     tensor_shape.set(Window::DimX, input.dimension(3));
628     tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)));
629     tensor_shape.set(Window::DimZ, input_tile_size.area());
630 
631     return tensor_shape;
632 }
633 
634 /** Calculate the winograd input transform shape
635  *
636  * @param[in] input         Input tensor info
637  * @param[in] winograd_info Winograd information
638  *
639  * @return the calculated shape
640  */
compute_winograd_input_transform_shape(const ITensorInfo & input,const WinogradInfo & winograd_info)641 inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
642 {
643     const PadStrideInfo conv_info        = winograd_info.convolution_info;
644     const Size2D        kernel_size      = winograd_info.kernel_size;
645     const Size2D        output_tile_size = winograd_info.output_tile_size;
646     const Size2D        input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
647 
648     const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
649     const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
650     const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
651 
652     // Compute the number of output tiles along the x and y direction of size "output_tile_size"
653     const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]),
654                                                                 kernel_size,
655                                                                 output_tile_size,
656                                                                 conv_info);
657 
658     const unsigned int width  = input.tensor_shape()[idx_c];
659     const unsigned int height = num_tiles.area();
660     const unsigned int depth  = input_tile_size.area();
661 
662     TensorShape output_shape{ input.tensor_shape() };
663     output_shape.set(0, width);
664     output_shape.set(1, height);
665     output_shape.set(2, depth);
666 
667     return output_shape;
668 }
669 
670 /** Calculate the winograd output transform shape
671  *
672  * @param[in] input         Input tensor info
673  * @param[in] winograd_info Winograd information
674  *
675  * @return the calculated shape
676  */
compute_winograd_output_transform_shape(const ITensorInfo & input,const WinogradInfo & winograd_info)677 inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
678 {
679     const PadStrideInfo conv_info        = winograd_info.convolution_info;
680     const Size2D        kernel_size      = winograd_info.kernel_size;
681     const Size2D        input_dimensions = winograd_info.input_dimensions;
682     const DataLayout    data_layout      = winograd_info.output_data_layout;
683 
684     // Compute output shape
685     unsigned int output_width  = 0;
686     unsigned int output_height = 0;
687     std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
688                                                               kernel_size.width, kernel_size.height, conv_info);
689 
690     TensorShape tensor_shape{ input.tensor_shape() };
691 
692     // Output dimension
693     const unsigned int out_w = output_width;
694     const unsigned int out_h = output_height;
695     const unsigned int out_c = input.dimension(0);
696 
697     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH), out_w);
698     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT), out_h);
699     tensor_shape.set(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL), out_c);
700 
701     return tensor_shape;
702 }
703 
704 /** Calculate the deep convolution shape output shape of a tensor
705  *
706  * @param[in] input_shape       Input tensor shape
707  * @param[in] input_data_layout Input data layout
708  * @param[in] weights_shape     Weights tensor shape
709  * @param[in] conv_info         Contains padding and stride information
710  *
711  * @return the calculated shape
712  */
compute_deep_convolution_shape(const TensorShape & input_shape,DataLayout input_data_layout,const TensorShape & weights_shape,const PadStrideInfo & conv_info)713 inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
714 {
715     const size_t idx_width   = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH);
716     const size_t idx_height  = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT);
717     const size_t idx_channel = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::CHANNEL);
718 
719     const unsigned int input_width         = input_shape[idx_width];
720     const unsigned int input_height        = input_shape[idx_height];
721     const unsigned int weights_width       = weights_shape[idx_width];
722     const unsigned int weights_height      = weights_shape[idx_height];
723     const unsigned int weights_out_channel = weights_shape[3];
724     unsigned int       output_width        = 0;
725     unsigned int       output_height       = 0;
726     std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
727 
728     TensorShape output_shape{ input_shape };
729     output_shape.set(idx_width, output_width);
730     output_shape.set(idx_height, output_height);
731     output_shape.set(idx_channel, weights_out_channel);
732 
733     return output_shape;
734 }
735 
736 /** Calculate the deep convolution shape output shape of a tensor
737  *
738  * @param[in] input     Input tensor info
739  * @param[in] weights   Weights tensor info
740  * @param[in] conv_info Contains padding and stride information
741  *
742  * @return the calculated shape
743  */
compute_deep_convolution_shape(const ITensorInfo & input,const ITensorInfo & weights,const PadStrideInfo & conv_info)744 inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info)
745 {
746     return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info);
747 }
748 
749 /** Calculate the indirect buffer output shape used by the indirect convolution function
750  *
751  * @param[in] input_shape       Input tensor shape
752  * @param[in] input_data_layout Input data layout
753  * @param[in] weights_shape     Weights tensor shape
754  * @param[in] conv_info         Contains padding and stride information
755  * @param[in] desc              Contains the direct/indirect convolution compute arguments, such as the tiling dimensions
756  *
757  * @return the calculated shape
758  */
compute_indirect_buffer_shape(const TensorShape & input_shape,DataLayout input_data_layout,const TensorShape & weights_shape,const PadStrideInfo & conv_info,const DirectConvComputeKernelInfo & desc)759 inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info,
760                                                  const DirectConvComputeKernelInfo &desc)
761 {
762     ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC");
763     ARM_COMPUTE_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8, "M0 can only be greater than 0 and less than or equal to 8");
764 
765     const unsigned int m0 = desc.m0;
766     const unsigned int kw = weights_shape[1];
767     const unsigned int kh = weights_shape[2];
768 
769     TensorShape output_conv2d_shape = compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info);
770 
771     const unsigned int output_w = m0 * kw * kh;
772     const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0);
773     const unsigned int output_b = output_conv2d_shape[3];
774 
775     return TensorShape(output_w, output_h, output_b);
776 }
777 
778 /** Calculate the min/max shape output shape of a tensor
779  *
780  * @param[in] input Input tensor info
781  *
782  * @return the calculated shape
783  */
compute_min_max_shape(const ITensorInfo * input)784 inline TensorShape compute_min_max_shape(const ITensorInfo *input)
785 {
786     TensorShape output_shape{ input->tensor_shape() };
787     output_shape.set(Window::DimX, 2);
788     output_shape.remove_dimension(1);
789     output_shape.remove_dimension(1);
790 
791     return output_shape;
792 }
793 
794 /** Calculate the output pool shape of a tensor
795  *
796  * @param[in] input     Input tensor info
797  * @param[in] pool_info Pooling layer info
798  *
799  * @return the calculated shape
800  */
compute_pool_shape(const ITensorInfo & input,PoolingLayerInfo pool_info)801 inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
802 {
803     int pooled_w = 0;
804     int pooled_h = 0;
805 
806     TensorShape output_shape{ input.tensor_shape() };
807 
808     const bool is_global_pooling = pool_info.is_global_pooling;
809     const int  idx_width         = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
810     const int  idx_height        = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
811     const int  input_width       = input.tensor_shape()[idx_width];
812     const int  input_height      = input.tensor_shape()[idx_height];
813     const int  pool_size_x       = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width;
814     const int  pool_size_y       = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height;
815 
816     std::tie(pooled_w, pooled_h) = scaled_dimensions_signed(input_width, input_height,
817                                                             pool_size_x, pool_size_y,
818                                                             pool_info.pad_stride_info);
819 
820     ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid");
821 
822     output_shape.set(idx_width, static_cast<size_t>(pooled_w));
823     output_shape.set(idx_height, static_cast<size_t>(pooled_h));
824 
825     return output_shape;
826 }
827 
828 /** Calculate the output unpool shape of a tensor
829  *
830  * @param[in] input     Input tensor info
831  * @param[in] pool_info Pooling layer info
832  *
833  * @return the calculated shape
834  */
compute_unpool_shape(const ITensorInfo & input,PoolingLayerInfo pool_info)835 inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info)
836 {
837     const unsigned int idx_width   = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
838     const unsigned int idx_height  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
839     const TensorShape  input_shape = input.tensor_shape();
840     ARM_COMPUTE_ERROR_ON(input_shape[idx_height] <= 1 || input_shape[idx_width] <= 1);
841     const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
842     const unsigned int  stride_x        = pad_stride_info.stride().first;
843     const unsigned int  stride_y        = pad_stride_info.stride().second;
844 
845     const int pad_left   = pad_stride_info.pad_left();
846     const int pad_top    = pad_stride_info.pad_top();
847     const int pad_right  = pad_stride_info.pad_right();
848     const int pad_bottom = pad_stride_info.pad_bottom();
849 
850     TensorShape        output_shape = input_shape;
851     const unsigned int out_width    = (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
852     const unsigned int out_height   = (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
853 
854     output_shape.set(idx_width, out_width);
855     output_shape.set(idx_height, out_height);
856     return output_shape;
857 }
858 
859 /** Calculate the output roi align shape of a tensor
860  *
861  * @param[in] input     Input tensor info
862  * @param[in] rois      Rois tensor info
863  * @param[in] pool_info Pooling layer info
864  *
865  * @return the calculated shape
866  */
compute_roi_align_shape(const ITensorInfo & input,const ITensorInfo & rois,ROIPoolingLayerInfo pool_info)867 inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
868 {
869     TensorShape output_shape{ input.tensor_shape() };
870 
871     const unsigned int idx_width  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
872     const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
873 
874     output_shape.set(idx_width, pool_info.pooled_width());
875     output_shape.set(idx_height, pool_info.pooled_height());
876     output_shape.set(3, rois.dimension(1));
877 
878     return output_shape;
879 }
880 
881 /** Calculate the RNN shape of a tensor
882  *
883  * @param[in] input      Input tensor info
884  * @param[in] batch_size Batch size
885  *
886  * @return the calculated shape
887  */
compute_rnn_shape(const ITensorInfo * input,const unsigned int batch_size)888 inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size)
889 {
890     TensorShape output_shape{ input->tensor_shape() };
891     output_shape.set(1, batch_size);
892 
893     return output_shape;
894 }
895 
896 /** Calculate the matrix multiplication output shape of two tensors
897  *
898  * @param[in] input0                    First input tensor info
899  * @param[in] input1                    Second input tensor info
900  * @param[in] is_interleaved_transposed True if the input is interleaved transposed
901  * @param[in] reshape_info              GEMM reshape info
902  *
903  * @return the calculated shape
904  */
compute_mm_shape(const ITensorInfo & input0,const ITensorInfo & input1,bool is_interleaved_transposed,const GEMMReshapeInfo & reshape_info)905 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
906 {
907     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
908     ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
909 
910     const bool reinterpret_input_as_3d  = reshape_info.reinterpret_input_as_3d();
911     const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0;
912     const int  depth_output_gemm3d      = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1;
913     const int  m                        = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
914 
915     // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
916     // dimension of the output tensor
917     const int dim0 = is_interleaved_transposed ? reshape_info.n() : input1.dimension(0);
918     const int dim1 = is_interleaved_transposed ? reshape_info.m() / depth_output_gemm3d : m / depth_output_gemm3d;
919     const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
920     const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3];
921 
922     TensorShape output_shape{ input0.tensor_shape() };
923 
924     output_shape.set(0, dim0);
925     output_shape.set(1, dim1);
926     output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : dim2);
927     output_shape.set(3, reinterpret_output_as_3d ? dim2 : dim3);
928     output_shape.set(4, reinterpret_output_as_3d ? dim3 : 1);
929 
930     return output_shape;
931 }
932 
933 /** Calculate the matrix multiplication output shape of two tensors
934  *
935  * @param[in] input0    First input tensor info
936  * @param[in] input1    Second input tensor info
937  * @param[in] gemm_info GEMM reshape info
938  *
939  * @return the calculated shape
940  */
compute_mm_shape(const ITensorInfo & input0,const ITensorInfo & input1,const GEMMReshapeInfo & gemm_info)941 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
942 {
943     ARM_COMPUTE_UNUSED(input1);
944     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
945 
946     const bool reinterpret_input_as_3d  = gemm_info.reinterpret_input_as_3d();
947     const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
948     const int  depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
949 
950     TensorShape output_shape{ input0.tensor_shape() };
951 
952     if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
953     {
954         output_shape.set(0, gemm_info.n());
955         output_shape.set(1, gemm_info.m());
956     }
957     else
958     {
959         // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
960         // dimension of the output tensor
961         const int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
962         output_shape.set(0, gemm_info.n());
963         output_shape.set(1, gemm_info.m() / depth_output_gemm3d);
964         output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size);
965         output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1);
966     }
967 
968     return output_shape;
969 }
970 
971 /** Calculate the matrix multiplication output shape of two tensors
972  *
973  * @param[in] input0    First input tensor info
974  * @param[in] input1    Second input tensor info
975  * @param[in] gemm_info GEMM kernel info used to retrieve the original dimensions of the input matrices
976  *
977  * @return the calculated shape
978  */
compute_mm_shape(const ITensorInfo & input0,const ITensorInfo & input1,const GEMMKernelInfo & gemm_info)979 inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
980 {
981     ARM_COMPUTE_UNUSED(input1);
982     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
983 
984     const bool         reinterpret_input_as_3d  = gemm_info.reinterpret_input_as_3d;
985     const bool         reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
986     const unsigned int depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1;
987 
988     TensorShape output_shape{ input0.tensor_shape() };
989 
990     if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
991     {
992         output_shape.set(0, gemm_info.n);
993         output_shape.set(1, gemm_info.m);
994     }
995     else
996     {
997         // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
998         // dimension of the output tensor
999         const unsigned int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
1000         output_shape.set(0, gemm_info.n);
1001         output_shape.set(1, gemm_info.m / depth_output_gemm3d);
1002         output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size);
1003         output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1);
1004     }
1005 
1006     return output_shape;
1007 }
1008 
1009 /** Calculate the matrix multiplication output shape of two tensors
1010  *
1011  * @param[in] input           Input tensor info
1012  * @param[in] gemm_3d_depth   (Optional)  GEMM 3d depth
1013  * @param[in] batch_size_on_z (Optional) True if batch size is on z axis
1014  *
1015  * @return the calculated shape
1016  */
1017 inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
1018 {
1019     ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
1020 
1021     TensorShape output_shape = input.tensor_shape();
1022     if(gemm_3d_depth > 1)
1023     {
1024         if(batch_size_on_z)
1025         {
1026             output_shape.shift_right(1);
1027         }
1028         output_shape.set(0, input.tensor_shape().x());
1029         output_shape.set(1, input.tensor_shape().y() / gemm_3d_depth);
1030         output_shape.set(2, gemm_3d_depth);
1031     }
1032 
1033     return output_shape;
1034 }
1035 
1036 /** Calculate the strided slice output shape of a tensor
1037  *
1038  * @param[in] input            Input tensor info
1039  * @param[in] starts           The starts of the dimensions of the input tensor to be sliced
1040  * @param[in] ends             The ends of the dimensions of the input tensor to be sliced
1041  * @param[in] strides          The strides of the dimensions of the input tensor to be sliced
1042  * @param[in] begin_mask       If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
1043  * @param[in] end_mask         If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
1044  * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1
1045  *
1046  * @return the calculated shape
1047  */
compute_strided_slice_shape(const ITensorInfo & input,const Coordinates & starts,const Coordinates & ends,const Coordinates & strides,int32_t begin_mask,int32_t end_mask,int32_t shrink_axis_mask)1048 inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
1049                                                const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
1050                                                int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
1051 {
1052     using namespace arm_compute::helpers::tensor_transform;
1053     return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
1054 }
1055 
1056 /** Calculate the slice output shape of a tensor
1057  *
1058  * @param[in] input_shape Input tensor info
1059  * @param[in] starts      The starts of the dimensions of the input tensor to be sliced
1060  * @param[in] ends        The ends of the dimensions of the input tensor to be sliced
1061  *
1062  * @return the calculated shape
1063  */
compute_slice_shape(const TensorShape & input_shape,const Coordinates & starts,const Coordinates & ends)1064 inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
1065 {
1066     using namespace arm_compute::helpers::tensor_transform;
1067 
1068     return compute_strided_slice_output_shape(input_shape,
1069                                               starts, ends, BiStrides(),
1070                                               0, construct_slice_end_mask(ends), 0);
1071 }
1072 
1073 /** Calculate the batch to space output shape of a tensor
1074  *
1075  * @param[in] input   Input tensor info
1076  * @param[in] block_x Block shape x value
1077  * @param[in] block_y Block shape y value
1078  *
1079  * @return the calculated shape
1080  */
compute_batch_to_space_shape(const ITensorInfo * input,const int block_x,const int block_y)1081 inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y)
1082 {
1083     ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0);
1084 
1085     const DataLayout data_layout = input->data_layout();
1086     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1087     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1088     const int        idx_batch   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
1089 
1090     TensorShape output_shape{ input->tensor_shape() };
1091     output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x);
1092     output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y);
1093     output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y));
1094 
1095     return output_shape;
1096 }
1097 
1098 /** Calculate the depth to space output shape of a tensor
1099  *
1100  * @param[in] input_shape Input tensor shape
1101  * @param[in] data_layout Operation data layout
1102  * @param[in] block       Block shape value
1103  *
1104  * @return the calculated shape
1105  */
compute_depth_to_space_shape(const TensorShape & input_shape,DataLayout data_layout,int block)1106 inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, DataLayout data_layout, int block)
1107 {
1108     ARM_COMPUTE_ERROR_ON(block < 2);
1109 
1110     const int idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1111     const int idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1112     const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
1113 
1114     TensorShape output_shape{ input_shape };
1115     output_shape.set(idx_width, input_shape[idx_width] * block);
1116     output_shape.set(idx_height, input_shape[idx_height] * block);
1117     output_shape.set(idx_channel, input_shape[idx_channel] / (block * block));
1118 
1119     return output_shape;
1120 }
1121 
1122 /** Calculate the split output shape of a tensor
1123  *
1124  * @param[in] input      Input tensor info
1125  * @param[in] axis       Axis on which to split the input
1126  * @param[in] num_splits Number of splits
1127  *
1128  * @return the calculated shape
1129  */
compute_split_shape(const ITensorInfo * input,unsigned int axis,unsigned int num_splits)1130 inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int axis, unsigned int num_splits)
1131 {
1132     TensorShape empty_shape;
1133     empty_shape.set(0, 0);
1134 
1135     TensorShape out_shape{ input->tensor_shape() };
1136 
1137     // Return empty shape if axis is invalid
1138     if(axis > input->tensor_shape().num_dimensions())
1139     {
1140         return empty_shape;
1141     }
1142 
1143     size_t axis_size = out_shape[axis];
1144 
1145     // Return empty shape if num_split is not valid
1146     if(axis_size % num_splits)
1147     {
1148         return empty_shape;
1149     }
1150 
1151     out_shape[axis] = axis_size / num_splits;
1152     return out_shape;
1153 }
1154 
1155 /** Calculate the space to batch output shape of a tensor
1156  *
1157  * @param[in] input         Input tensor info
1158  * @param[in] block_x       Block shape x value
1159  * @param[in] block_y       Block shape y value
1160  * @param[in] padding_left  Left padding values
1161  * @param[in] padding_right Right padding values
1162  *
1163  * @return the calculated shape
1164  */
compute_space_to_batch_shape(const ITensorInfo * input,const int block_x,const int block_y,const Size2D & padding_left,const Size2D & padding_right)1165 inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right)
1166 {
1167     TensorShape output_shape{ input->tensor_shape() };
1168 
1169     const DataLayout data_layout = input->data_layout();
1170     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1171     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1172     const int        idx_batch   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
1173 
1174     ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_x != 0);
1175     ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_y != 0);
1176 
1177     output_shape.set(idx_width, (input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) / block_x);
1178     output_shape.set(idx_height, (input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) / block_y);
1179     output_shape.set(idx_batch, input->tensor_shape()[idx_batch] * block_x * block_y);
1180 
1181     return output_shape;
1182 }
1183 
1184 /** Calculate the space to batch output shape of a tensor
1185  *
1186  * @param[in] input       Input tensor info
1187  * @param[in] block_shape Block shape value
1188  *
1189  * @return the calculated shape
1190  */
compute_space_to_depth_shape(const ITensorInfo * input,int32_t block_shape)1191 inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape)
1192 {
1193     TensorShape output_shape{ input->tensor_shape() };
1194 
1195     const DataLayout data_layout = input->data_layout();
1196     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1197     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1198     const int        idx_depth   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
1199 
1200     output_shape.set(idx_width, input->tensor_shape()[idx_width] / block_shape);
1201     output_shape.set(idx_height, input->tensor_shape()[idx_height] / block_shape);
1202     output_shape.set(idx_depth, input->tensor_shape()[idx_depth] * (block_shape * block_shape));
1203 
1204     return output_shape;
1205 }
1206 
1207 /** Calculate the prior box output shape of a tensor
1208  *
1209  * @param[in] input Input tensor info
1210  * @param[in] info  PriorBoxLayer info
1211  *
1212  * @return the calculated shape
1213  */
compute_prior_box_shape(const ITensorInfo & input,const PriorBoxLayerInfo & info)1214 inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const PriorBoxLayerInfo &info)
1215 {
1216     DataLayout   data_layout = input.data_layout();
1217     const size_t idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1218     const size_t idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1219     const int    num_priors  = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size();
1220 
1221     TensorShape output_shape{};
1222     output_shape.set(0, input.dimension(idx_w) * input.dimension(idx_h) * num_priors * 4);
1223     output_shape.set(1, 2);
1224 
1225     return output_shape;
1226 }
1227 
1228 /** Calculate the padded shape of a tensor
1229  *
1230  * @param[in] input_shape Input tensor shape
1231  * @param[in] padding     Paddings list
1232  *
1233  * @return the calculated shape
1234  */
compute_padded_shape(const TensorShape & input_shape,const PaddingList & padding)1235 inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding)
1236 {
1237     TensorShape padded_shape = input_shape;
1238     for(size_t dim = 0; dim < padding.size(); ++dim)
1239     {
1240         const auto    &padding_pair   = padding[dim];
1241         const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim];
1242         padded_shape.set(dim, padding_pair.first + shape_on_index + padding_pair.second);
1243     }
1244     return padded_shape;
1245 }
1246 
1247 /** Calculate the tiled shape of a tensor
1248  *
1249  * @param[in] input_shape Input tensor shape
1250  * @param[in] multiples   Paddings list
1251  *
1252  * @return the calculated shape
1253  */
compute_tiled_shape(const TensorShape & input_shape,const Multiples & multiples)1254 inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples)
1255 {
1256     TensorShape tiled_shape = input_shape;
1257     for(size_t dim = 0; dim < multiples.size(); ++dim)
1258     {
1259         tiled_shape.set(dim, input_shape[dim] * multiples[dim]);
1260     }
1261     return tiled_shape;
1262 }
1263 
1264 /** Calculate the reduced shape of a tensor given an axis
1265  *
1266  * @param[in] input     Input tensor info
1267  * @param[in] axis      Axis on which to perform reduction
1268  * @param[in] keep_dims (Optional) Whether to keep the dimension after reduction operation. Defaults to true.
1269  *
1270  * @return the calculated shape
1271  */
1272 inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true)
1273 {
1274     TensorShape output_shape{ input };
1275 
1276     if(!keep_dims)
1277     {
1278         output_shape.remove_dimension(axis);
1279     }
1280     else
1281     {
1282         output_shape.set(axis, 1);
1283     }
1284 
1285     return output_shape;
1286 }
1287 
1288 /** Calculate the upsampled shape of a tensor
1289  *
1290  * @param[in] input Input tensor info
1291  * @param[in] info  Contains stride information (x and y)
1292  *
1293  * @return the calculated shape
1294  */
compute_upsample_shape(const ITensorInfo & input,const Size2D & info)1295 inline TensorShape compute_upsample_shape(const ITensorInfo &input, const Size2D &info)
1296 {
1297     const DataLayout data_layout = input.data_layout();
1298     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1299     const int        idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1300 
1301     TensorShape        scale_out_shape(input.tensor_shape());
1302     const unsigned int out_x = input.dimension(idx_width) * info.x();
1303     const unsigned int out_y = input.dimension(idx_height) * info.y();
1304     scale_out_shape.set(idx_width, out_x);
1305     scale_out_shape.set(idx_height, out_y);
1306 
1307     return scale_out_shape;
1308 }
1309 
1310 /** Get the tensor shape
1311  *
1312  * @param[in] data Input data
1313  *
1314  * @return the extracted tensor shape
1315  */
1316 template <typename T>
extract_shape(T * data)1317 inline TensorShape extract_shape(T *data)
1318 {
1319     return data->info()->tensor_shape();
1320 }
1321 
extract_shape(ITensorInfo * data)1322 inline TensorShape extract_shape(ITensorInfo *data)
1323 {
1324     return data->tensor_shape();
1325 }
extract_shape(const ITensorInfo * data)1326 inline TensorShape extract_shape(const ITensorInfo *data)
1327 {
1328     return data->tensor_shape();
1329 }
1330 
extract_shape(const TensorShape * data)1331 inline TensorShape extract_shape(const TensorShape *data)
1332 {
1333     return *data;
1334 }
1335 
extract_shape(TensorShape * data)1336 inline TensorShape extract_shape(TensorShape *data)
1337 {
1338     return *data;
1339 }
1340 
1341 /** Calculate the unstack shape of a tensor
1342  *
1343  * @param[in] input_shape Input tensor shape
1344  * @param[in] axis        Axis on which to perform the unstack operation
1345  *
1346  * @return the calculated shape
1347  */
calculate_unstack_shape(TensorShape input_shape,unsigned int axis)1348 inline TensorShape calculate_unstack_shape(TensorShape input_shape, unsigned int axis)
1349 {
1350     ARM_COMPUTE_ERROR_ON(axis > input_shape.num_dimensions());
1351     input_shape.remove_dimension(axis);
1352     return input_shape;
1353 }
1354 
1355 /** Calculate the concatenate output shape of the concatenate operation along a single axis
1356  *
1357  * @param[in] input Vector containing the shapes of the inputs
1358  * @param[in] axis  Axis along which to concatenate the input tensors
1359  *
1360  * @return the calculated shape
1361  */
1362 template <typename T>
calculate_concatenate_shape(const std::vector<T * > & input,size_t axis)1363 inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, size_t axis)
1364 {
1365     TensorShape out_shape = extract_shape(input[0]);
1366 
1367 #if defined(ARM_COMPUTE_ASSERTS_ENABLED)
1368     // All dimensions must match except the axis one
1369     for(unsigned int i = 0; i < MAX_DIMS; ++i)
1370     {
1371         if(i == axis)
1372         {
1373             continue;
1374         }
1375 
1376         for(const auto &tensor : input)
1377         {
1378             ARM_COMPUTE_ERROR_ON(tensor == nullptr);
1379             const TensorShape shape = extract_shape(tensor);
1380             ARM_COMPUTE_ERROR_ON(out_shape[i] != shape[i]);
1381         }
1382     }
1383 #endif // defined(ARM_COMPUTE_ASSERTS_ENABLED)
1384 
1385     // Calculate output shape
1386     size_t new_size = 0;
1387     for(const auto &tensor : input)
1388     {
1389         const TensorShape shape = extract_shape(tensor);
1390         new_size += shape[axis];
1391     }
1392 
1393     out_shape.set(axis, new_size);
1394 
1395     return out_shape;
1396 }
1397 /** Calculate the stack output shape of a tensor
1398  *
1399  * @param[in] a           Input tensor info
1400  * @param[in] axis        Axis on which to perform the stack operation
1401  * @param[in] num_tensors Number of tensors to stack
1402  *
1403  * @return the calculated shape
1404  */
compute_stack_shape(const ITensorInfo & a,unsigned int axis,unsigned int num_tensors)1405 inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, unsigned int num_tensors)
1406 {
1407     ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions());
1408     ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4);
1409 
1410     TensorShape shape_out{ a.tensor_shape() };
1411     shape_out.set(axis, num_tensors);
1412 
1413     unsigned int i_shift = 0;
1414 
1415     for(unsigned int i = 0; i < a.num_dimensions(); ++i)
1416     {
1417         if(i == axis)
1418         {
1419             i_shift++;
1420         }
1421 
1422         shape_out.set(i + i_shift, a.tensor_shape()[i]);
1423     }
1424     return shape_out;
1425 }
1426 
1427 /** Calculate the output shape of 3d Convolution
1428  *
1429  * @param[in] src         Input tensor shape
1430  * @param[in] weights     Weights tensor shape
1431  * @param[in] conv3d_info 3d Convolution Parameters object
1432  *
1433  * @return the calculated shape
1434  */
compute_conv3d_shape(const TensorShape & src,const TensorShape & weights,const Conv3dInfo & conv3d_info)1435 inline TensorShape compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info)
1436 {
1437     // Weight tensor shape indices (D H W Cin Cout)
1438     constexpr unsigned int weights_depth_dim  = 4u;
1439     constexpr unsigned int weights_height_dim = 3u;
1440     constexpr unsigned int weights_width_dim  = 2u;
1441     constexpr unsigned int weights_CHout_dim  = 0u;
1442 
1443     // Source/Destination Tensor shape indices (N D H W C)
1444     constexpr unsigned int batch_dim   = 4u;
1445     constexpr unsigned int depth_dim   = 3u;
1446     constexpr unsigned int height_dim  = 2u;
1447     constexpr unsigned int width_dim   = 1u;
1448     constexpr unsigned int channel_dim = 0u;
1449 
1450     TensorShape  output_shape{ src };
1451     const size_t pad_left   = conv3d_info.padding.left;
1452     const size_t pad_right  = conv3d_info.padding.right;
1453     const size_t pad_top    = conv3d_info.padding.top;
1454     const size_t pad_bottom = conv3d_info.padding.bottom;
1455     const size_t pad_front  = conv3d_info.padding.front;
1456     const size_t pad_back   = conv3d_info.padding.back;
1457     const size_t dilation_x = conv3d_info.dilation.width;
1458     const size_t dilation_y = conv3d_info.dilation.height;
1459     const size_t dilation_z = conv3d_info.dilation.depth;
1460     const size_t stride_x   = conv3d_info.stride.x();
1461     const size_t stride_y   = conv3d_info.stride.y();
1462     const size_t stride_z   = conv3d_info.stride.z();
1463 
1464     int output_width_size  = 0;
1465     int output_height_size = 0;
1466     int output_depth_size  = 0;
1467 
1468     switch(conv3d_info.round_type)
1469     {
1470         case DimensionRoundingType::FLOOR:
1471             output_width_size  = static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right - (dilation_x * (weights[weights_width_dim] - 1) + 1)) / stride_x) + 1));
1472             output_height_size = static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom - (dilation_y * (weights[weights_height_dim] - 1) + 1)) / stride_y) + 1));
1473             output_depth_size  = static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back - (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / stride_z) + 1));
1474             break;
1475         case DimensionRoundingType::CEIL:
1476             output_width_size  = static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right - (dilation_x * (weights[weights_width_dim] - 1) + 1)) / stride_x) + 1));
1477             output_height_size = static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom - (dilation_y * (weights[weights_height_dim] - 1) + 1)) / stride_y) + 1));
1478             output_depth_size  = static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back - (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / stride_z) + 1));
1479             break;
1480         default:
1481             ARM_COMPUTE_ERROR("Unsupported rounding type");
1482     }
1483 
1484     output_shape.set(batch_dim, src[batch_dim]);
1485     output_shape.set(width_dim, output_width_size);
1486     output_shape.set(height_dim, output_height_size);
1487     output_shape.set(depth_dim, output_depth_size);
1488     output_shape.set(channel_dim, weights[weights_CHout_dim]);
1489     return output_shape;
1490 }
1491 
1492 /** Calculate the output pool3d shape of a tensor
1493  *
1494  * @param[in] src         Input tensor info
1495  * @param[in] pool3d_info Pooling layer info
1496  *
1497  * @return the calculated shape
1498  */
compute_pool3d_shape(const TensorShape & src,Pooling3dLayerInfo pool3d_info)1499 inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info)
1500 {
1501     TensorShape output_shape{ src };
1502 
1503     const auto data_layout      = DataLayout::NDHWC;
1504     const int  idx_width        = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
1505     const int  idx_height       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
1506     const int  idx_depth        = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH);
1507     const int  pool_size_width  = pool3d_info.is_global_pooling ? src[idx_width] : pool3d_info.pool_size.width;
1508     const int  pool_size_height = pool3d_info.is_global_pooling ? src[idx_height] : pool3d_info.pool_size.height;
1509     const int  pool_size_depth  = pool3d_info.is_global_pooling ? src[idx_depth] : pool3d_info.pool_size.depth;
1510     int        output_width     = 0;
1511     int        output_height    = 0;
1512     int        output_depth     = 0;
1513 
1514     std::tie(output_width, output_height, output_depth) = scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height,
1515                                                                                       pool_size_depth, pool3d_info);
1516 
1517     ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1), "Calculated output dimension size is invalid");
1518 
1519     output_shape.set(idx_width, static_cast<size_t>(output_width));
1520     output_shape.set(idx_height, static_cast<size_t>(output_height));
1521     output_shape.set(idx_depth, static_cast<size_t>(output_depth));
1522 
1523     return output_shape;
1524 }
1525 
1526 /** Calculate the gather output shape of a tensor
1527  *
1528  * @param[in] input_shape   Input tensor shape
1529  * @param[in] indices_shape Indices tensor shape. Only supports for 2d and 3d indices
1530  * @param[in] actual_axis   Axis to be used in the computation
1531  *
1532  * @note Let input_shape be (X,Y,Z) and indices shape (W,O,P) and axis 1
1533  *       the new shape is computed by replacing the axis in the input shape with
1534  *       the indice shape so the output shape will be (X,W,O,P,Z)
1535  *
1536  * @return the calculated shape
1537  */
compute_gather_shape(const TensorShape & input_shape,const TensorShape & indices_shape,uint32_t actual_axis)1538 inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
1539 {
1540     ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4);
1541     ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions());
1542     ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 3);
1543     TensorShape output_shape = input_shape;
1544     if(indices_shape.num_dimensions() == 1u)
1545     {
1546         output_shape[actual_axis] = indices_shape[0];
1547     }
1548     else
1549     {
1550         const auto ind_num_dims
1551         {
1552             indices_shape.num_dimensions()
1553         };
1554         output_shape.shift_right(ind_num_dims - 1);
1555         switch(actual_axis)
1556         {
1557             case 1:
1558             {
1559                 output_shape[0] = input_shape[0];
1560                 for(size_t idx = 0; idx < ind_num_dims; ++idx)
1561                 {
1562                     output_shape.set(actual_axis + idx, indices_shape[idx], false);
1563                 }
1564                 break;
1565             }
1566             default:
1567             {
1568                 // 2d and 3d indices are only supported for axis == 1
1569                 ARM_COMPUTE_ERROR_ON(actual_axis != 1 && indices_shape.num_dimensions() > 1);
1570             }
1571         }
1572     }
1573     return output_shape;
1574 }
1575 } // namespace shape_calculator
1576 } // namespace misc
1577 } // namespace arm_compute
1578 #endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */
1579