• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
25 
26 #include "arm_compute/core/CL/CLKernelLibrary.h"
27 #include "arm_compute/core/CL/ICLTensor.h"
28 #include "arm_compute/core/Helpers.h"
29 #include "arm_compute/core/Utils.h"
30 #include "arm_compute/core/Validate.h"
31 #include "src/core/helpers/WindowHelpers.h"
32 #include "support/StringSupport.h"
33 
34 using namespace arm_compute;
35 
CLGaussianPyramidHorKernel()36 CLGaussianPyramidHorKernel::CLGaussianPyramidHorKernel()
37     : _l2_load_offset(0)
38 {
39 }
40 
border_size() const41 BorderSize CLGaussianPyramidHorKernel::border_size() const
42 {
43     return BorderSize{ 0, 2 };
44 }
45 
configure(const ICLTensor * input,ICLTensor * output)46 void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output)
47 {
48     configure(CLKernelLibrary::get().get_compile_context(), input, output);
49 }
50 
configure(const CLCompileContext & compile_context,const ICLTensor * input,ICLTensor * output)51 void CLGaussianPyramidHorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
52 {
53     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
54     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16);
55     ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
56 
57     for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
58     {
59         ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
60     }
61 
62     _input  = input;
63     _output = output;
64 
65     // Create kernel
66     const std::string kernel_name = std::string("gaussian1x5_sub_x");
67     _kernel                       = create_kernel(compile_context, kernel_name);
68 
69     // Configure kernel window
70     constexpr unsigned int num_elems_processed_per_iteration = 16;
71     constexpr unsigned int num_elems_read_per_iteration      = 20;
72     constexpr unsigned int num_elems_written_per_iteration   = 8;
73     const float            scale_x                           = static_cast<float>(output->info()->dimension(0)) / input->info()->dimension(0);
74 
75     Window                 win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration));
76     AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration, scale_x);
77 
78     // Sub sampling selects odd pixels (1, 3, 5, ...) for images with even
79     // width and even pixels (0, 2, 4, ...) for images with odd width. (Whether
80     // a pixel is even or odd is determined based on the tensor shape not the
81     // valid region!)
82     // Thus the offset from which the first pixel (L2) for the convolution is
83     // loaded depends on the anchor and shape of the valid region.
84     // In the case of an even shape (= even image width) we need to load L2
85     // from -2 if the anchor is odd and from -1 if the anchor is even. That
86     // makes sure that L2 is always loaded from an odd pixel.
87     // On the other hand, for an odd shape (= odd image width) we need to load
88     // L2 from -1 if the anchor is odd and from -2 if the anchor is even to
89     // achieve the opposite effect.
90     // The condition can be simplified to checking whether anchor + shape is
91     // odd (-2) or even (-1) as only adding an odd and an even number will have
92     // an odd result.
93     _l2_load_offset = -border_size().left;
94 
95     if((_input->info()->valid_region().anchor[0] + _input->info()->valid_region().shape[0]) % 2 == 0)
96     {
97         _l2_load_offset += 1;
98     }
99 
100     update_window_and_padding(win,
101                               AccessWindowHorizontal(input->info(), _l2_load_offset, num_elems_read_per_iteration),
102                               output_access);
103 
104     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
105 
106     ICLKernel::configure_internal(win);
107 
108     // Set config_id for enabling LWS tuning
109     _config_id = kernel_name;
110     _config_id += "_";
111     _config_id += lower_string(string_from_data_type(input->info()->data_type()));
112     _config_id += "_";
113     _config_id += support::cpp11::to_string(input->info()->dimension(0));
114     _config_id += "_";
115     _config_id += support::cpp11::to_string(input->info()->dimension(1));
116     _config_id += "_";
117     _config_id += support::cpp11::to_string(output->info()->dimension(0));
118     _config_id += "_";
119     _config_id += support::cpp11::to_string(output->info()->dimension(1));
120 }
121 
run(const Window & window,cl::CommandQueue & queue)122 void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue)
123 {
124     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
125     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
126 
127     Window win_in(window);
128     win_in.shift(Window::DimX, _l2_load_offset);
129 
130     //The output is half the width of the input:
131     Window win_out(window);
132     win_out.scale(Window::DimX, 0.5f);
133 
134     Window slice_in  = win_in.first_slice_window_2D();
135     Window slice_out = win_out.first_slice_window_2D();
136 
137     do
138     {
139         unsigned int idx = 0;
140         add_2D_tensor_argument(idx, _input, slice_in);
141         add_2D_tensor_argument(idx, _output, slice_out);
142         enqueue(queue, *this, slice_out, lws_hint());
143     }
144     while(win_in.slide_window_slice_2D(slice_in) && win_out.slide_window_slice_2D(slice_out));
145 }
146 
CLGaussianPyramidVertKernel()147 CLGaussianPyramidVertKernel::CLGaussianPyramidVertKernel()
148     : _t2_load_offset(0)
149 {
150 }
151 
border_size() const152 BorderSize CLGaussianPyramidVertKernel::border_size() const
153 {
154     return BorderSize{ 2, 0 };
155 }
156 
configure(const ICLTensor * input,ICLTensor * output)157 void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output)
158 {
159     configure(CLKernelLibrary::get().get_compile_context(), input, output);
160 }
161 
configure(const CLCompileContext & compile_context,const ICLTensor * input,ICLTensor * output)162 void CLGaussianPyramidVertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
163 {
164     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16);
165     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
166     ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
167 
168     for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
169     {
170         ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i));
171     }
172 
173     _input  = input;
174     _output = output;
175 
176     // Create kernel
177     const std::string kernel_name = std::string("gaussian5x1_sub_y");
178     _kernel                       = create_kernel(compile_context, "gaussian5x1_sub_y");
179 
180     // Configure kernel window
181     constexpr unsigned int num_elems_processed_per_iteration = 8;
182     constexpr unsigned int num_rows_processed_per_iteration  = 2;
183     constexpr unsigned int num_elems_written_per_iteration   = 8;
184     constexpr unsigned int num_elems_read_per_iteration      = 8;
185     constexpr unsigned int num_rows_per_iteration            = 5;
186 
187     const float scale_y = static_cast<float>(output->info()->dimension(1)) / input->info()->dimension(1);
188 
189     Window                win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_rows_processed_per_iteration));
190     AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_per_iteration, 1.f, scale_y);
191 
192     // Determine whether we need to load even or odd rows. See above for a
193     // detailed explanation.
194     _t2_load_offset = -border_size().top;
195 
196     if((_input->info()->valid_region().anchor[1] + _input->info()->valid_region().shape[1]) % 2 == 0)
197     {
198         _t2_load_offset += 1;
199     }
200 
201     update_window_and_padding(win,
202                               AccessWindowRectangle(input->info(), 0, _t2_load_offset, num_elems_read_per_iteration, num_rows_per_iteration),
203                               output_access);
204 
205     output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
206 
207     ICLKernel::configure_internal(win);
208 
209     // Set config_id for enabling LWS tuning
210     _config_id = kernel_name;
211     _config_id += "_";
212     _config_id += lower_string(string_from_data_type(input->info()->data_type()));
213     _config_id += "_";
214     _config_id += support::cpp11::to_string(input->info()->dimension(0));
215     _config_id += "_";
216     _config_id += support::cpp11::to_string(input->info()->dimension(1));
217     _config_id += "_";
218     _config_id += support::cpp11::to_string(output->info()->dimension(0));
219     _config_id += "_";
220     _config_id += support::cpp11::to_string(output->info()->dimension(1));
221 }
222 
run(const Window & window,cl::CommandQueue & queue)223 void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue)
224 {
225     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
226     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
227     ARM_COMPUTE_ERROR_ON(window.x().step() != 8);
228     ARM_COMPUTE_ERROR_ON(window.y().step() % 2);
229 
230     Window win_in(window);
231     win_in.shift(Window::DimY, _t2_load_offset);
232 
233     Window win_out(window);
234     win_out.scale(Window::DimY, 0.5f);
235 
236     Window slice_in  = win_in.first_slice_window_2D();
237     Window slice_out = win_out.first_slice_window_2D();
238 
239     do
240     {
241         unsigned int idx = 0;
242         add_2D_tensor_argument(idx, _input, slice_in);
243         add_2D_tensor_argument(idx, _output, slice_out);
244         enqueue(queue, *this, slice_out, lws_hint());
245     }
246     while(win_in.slide_window_slice_2D(slice_in) && win_out.slide_window_slice_2D(slice_out));
247 }
248