• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/CL/ICLTensor.h"
29 #include "arm_compute/core/Error.h"
30 #include "arm_compute/core/Helpers.h"
31 #include "arm_compute/core/Validate.h"
32 #include "arm_compute/core/Window.h"
33 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
34 #include "src/core/AccessWindowStatic.h"
35 #include "src/core/CL/CLValidate.h"
36 #include "src/core/helpers/AutoConfiguration.h"
37 #include "src/core/helpers/WindowHelpers.h"
38 #include "support/StringSupport.h"
39 
40 namespace arm_compute
41 {
CLUpsampleLayerKernel()42 CLUpsampleLayerKernel::CLUpsampleLayerKernel()
43     : _input(nullptr), _output(nullptr), _info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration_input_x()
44 {
45 }
46 
validate(const ITensorInfo * input,const ITensorInfo * output,const Size2D & info,const InterpolationPolicy upsampling_policy)47 Status CLUpsampleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy)
48 {
49     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
50     ARM_COMPUTE_UNUSED(upsampling_policy);
51 
52     DataLayout data_layout = input->data_layout();
53     const int  idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
54     const int  idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
55 
56     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
57     ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
58 
59     ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.x() != 2 || info.y() != 2, "Only stride 2 is supported");
60     ARM_COMPUTE_RETURN_ERROR_ON_MSG(upsampling_policy != InterpolationPolicy::NEAREST_NEIGHBOR, "Only nearest neighbor policy supported");
61 
62     if(output->total_size() != 0)
63     {
64         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
65         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
66         ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_width) != info.x() * input->dimension(idx_width));
67         ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_height) != info.y() * input->dimension(idx_height));
68         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
69     }
70 
71     return Status{};
72 }
73 
configure(const ICLTensor * input,ICLTensor * output,const Size2D & info,const InterpolationPolicy upsampling_policy)74 void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy)
75 {
76     configure(CLKernelLibrary::get().get_compile_context(), input, output, info, upsampling_policy);
77 }
78 
configure(const CLCompileContext & compile_context,const ICLTensor * input,ICLTensor * output,const Size2D & info,const InterpolationPolicy upsampling_policy)79 void CLUpsampleLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy)
80 {
81     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
82     ARM_COMPUTE_UNUSED(upsampling_policy);
83 
84     _input                                     = input;
85     _output                                    = output;
86     _info                                      = info;
87     _data_layout                               = input->info()->data_layout();
88     _num_elems_processed_per_iteration_input_x = 1;
89 
90     TensorShape output_shape = misc::shape_calculator::compute_upsample_shape(*input->info(), info);
91     auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
92     output->info()->set_data_layout(_data_layout);
93 
94     unsigned int num_elems_processed_per_iteration_x = 16;
95     const int    output_width_x                      = output->info()->dimension(0);
96     const bool   multi_access_x                      = ((output_width_x / num_elems_processed_per_iteration_x) > 0);
97 
98     // Perform validation step
99     ARM_COMPUTE_ERROR_THROW_ON(CLUpsampleLayerKernel::validate(input->info(), output->info(), info, upsampling_policy));
100 
101     Window win{};
102 
103     switch(_data_layout)
104     {
105         case DataLayout::NCHW:
106         {
107             win = calculate_max_window(*output->info());
108             win.set(Window::DimY, Window::Dimension(win.y().start(), win.y().end(), info.y()));
109             if(multi_access_x)
110             {
111                 _num_elems_processed_per_iteration_input_x = num_elems_processed_per_iteration_x / info.x();
112                 win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), num_elems_processed_per_iteration_x), num_elems_processed_per_iteration_x));
113             }
114             break;
115         }
116         case DataLayout::NHWC:
117         {
118             win = calculate_max_window(*output->info());
119             win.set(Window::DimY, Window::Dimension(win.y().start(), win.y().end(), info.x()));
120             win.set(Window::DimZ, Window::Dimension(win.z().start(), win.z().end(), info.y()));
121             if(multi_access_x)
122             {
123                 _num_elems_processed_per_iteration_input_x = num_elems_processed_per_iteration_x;
124                 win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(),
125                                                                                           num_elems_processed_per_iteration_x),
126                                                         num_elems_processed_per_iteration_x));
127             }
128             break;
129         }
130         default:
131             ARM_COMPUTE_ERROR("Not implemented");
132     }
133 
134     // Create kernel
135     CLBuildOptions build_opts;
136     build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())));
137     build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_IN=" + support::cpp11::to_string(_num_elems_processed_per_iteration_input_x));
138     build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration_x));
139     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_IN=" + support::cpp11::to_string(std::max<int>(_input->info()->dimension(0) - _num_elems_processed_per_iteration_input_x, 0)));
140     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_OUT=" + support::cpp11::to_string(std::max<int>(output_width_x - num_elems_processed_per_iteration_x, 0)));
141     _kernel = create_kernel(compile_context, "upsample_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
142 
143     ICLKernel::configure_internal(win);
144 }
145 
run(const Window & window,cl::CommandQueue & queue)146 void CLUpsampleLayerKernel::run(const Window &window, cl::CommandQueue &queue)
147 {
148     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
149     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
150 
151     Window collapsed_window = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
152     Window slice_out        = collapsed_window.first_slice_window_3D();
153     Window slice_in         = collapsed_window.first_slice_window_3D();
154 
155     switch(_data_layout)
156     {
157         case DataLayout::NCHW:
158             slice_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _num_elems_processed_per_iteration_input_x));
159             slice_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1));
160             break;
161         case DataLayout::NHWC:
162             slice_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1));
163             slice_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), 1));
164             break;
165         default:
166             ARM_COMPUTE_ERROR("Not implemented");
167     }
168 
169     do
170     {
171         unsigned int idx = 0;
172         add_3D_tensor_argument(idx, _input, slice_in);
173         add_3D_tensor_argument(idx, _output, slice_out);
174         enqueue(queue, *this, slice_out, lws_hint());
175     }
176     while(collapsed_window.slide_window_slice_3D(slice_out) && collapsed_window.slide_window_slice_3D(slice_in));
177 }
178 } // namespace arm_compute
179