• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/CL/kernels/CLLKTrackerKernel.h"
25 
26 #include "arm_compute/core/CL/CLKernelLibrary.h"
27 #include "arm_compute/core/CL/ICLArray.h"
28 #include "arm_compute/core/CL/ICLTensor.h"
29 #include "arm_compute/core/Coordinates.h"
30 #include "arm_compute/core/Helpers.h"
31 #include "arm_compute/core/TensorInfo.h"
32 #include "arm_compute/core/Validate.h"
33 #include "src/core/AccessWindowStatic.h"
34 #include "src/core/helpers/WindowHelpers.h"
35 
36 #include <cmath>
37 
38 using namespace arm_compute;
39 
configure(const ICLKeyPointArray * old_points,const ICLKeyPointArray * new_points_estimates,ICLLKInternalKeypointArray * old_points_internal,ICLLKInternalKeypointArray * new_points_internal,bool use_initial_estimate,size_t level,size_t num_levels,float pyramid_scale)40 void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
41                                       ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
42                                       bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
43 {
44     configure(CLKernelLibrary::get().get_compile_context(), old_points, new_points_estimates, old_points_internal, new_points_internal, use_initial_estimate, level, num_levels, pyramid_scale);
45 }
46 
configure(const CLCompileContext & compile_context,const ICLKeyPointArray * old_points,const ICLKeyPointArray * new_points_estimates,ICLLKInternalKeypointArray * old_points_internal,ICLLKInternalKeypointArray * new_points_internal,bool use_initial_estimate,size_t level,size_t num_levels,float pyramid_scale)47 void CLLKTrackerInitKernel::configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
48                                       ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
49                                       bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
50 
51 {
52     ARM_COMPUTE_ERROR_ON(old_points == nullptr);
53     ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
54     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
55 
56     const float scale = std::pow(pyramid_scale, level);
57 
58     // Create kernel
59     std::string kernel_name = "init_level";
60     if(level == (num_levels - 1))
61     {
62         kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max");
63     }
64     _kernel = create_kernel(compile_context, kernel_name);
65 
66     // Set static kernel arguments
67     unsigned int idx = 0;
68     if(level == (num_levels - 1))
69     {
70         _kernel.setArg(idx++, old_points->cl_buffer());
71         if(use_initial_estimate)
72         {
73             _kernel.setArg(idx++, new_points_estimates->cl_buffer());
74         }
75     }
76     _kernel.setArg(idx++, old_points_internal->cl_buffer());
77     _kernel.setArg(idx++, new_points_internal->cl_buffer());
78     _kernel.setArg<cl_float>(idx++, scale);
79 
80     // Configure kernel window
81     Window window;
82     window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
83     window.set(Window::DimY, Window::Dimension(0, 1, 1));
84     ICLKernel::configure_internal(window);
85 }
86 
run(const Window & window,cl::CommandQueue & queue)87 void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
88 {
89     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
90     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
91 
92     enqueue(queue, *this, window, lws_hint());
93 }
94 
configure(ICLLKInternalKeypointArray * new_points_internal,ICLKeyPointArray * new_points)95 void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
96 {
97     configure(CLKernelLibrary::get().get_compile_context(), new_points_internal, new_points);
98 }
99 
configure(const CLCompileContext & compile_context,ICLLKInternalKeypointArray * new_points_internal,ICLKeyPointArray * new_points)100 void CLLKTrackerFinalizeKernel::configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
101 
102 {
103     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
104     ARM_COMPUTE_ERROR_ON(new_points == nullptr);
105 
106     // Create kernel
107     _kernel = create_kernel(compile_context, "finalize");
108 
109     // Set static kernel arguments
110     unsigned int idx = 0;
111     _kernel.setArg(idx++, new_points_internal->cl_buffer());
112     _kernel.setArg(idx++, new_points->cl_buffer());
113 
114     // Configure kernel window
115     Window window;
116     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
117     window.set(Window::DimY, Window::Dimension(0, 1, 1));
118     ICLKernel::configure_internal(window);
119 }
120 
run(const Window & window,cl::CommandQueue & queue)121 void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
122 {
123     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
124     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
125 
126     enqueue(queue, *this, window, lws_hint());
127 }
128 
CLLKTrackerStage0Kernel()129 CLLKTrackerStage0Kernel::CLLKTrackerStage0Kernel()
130     : _old_input(nullptr), _old_scharr_gx(nullptr), _old_scharr_gy(nullptr)
131 {
132 }
133 
configure(const ICLTensor * old_input,const ICLTensor * old_scharr_gx,const ICLTensor * old_scharr_gy,ICLLKInternalKeypointArray * old_points_internal,ICLLKInternalKeypointArray * new_points_internal,ICLCoefficientTableArray * coeff_table,ICLOldValArray * old_ival,size_t window_dimension,size_t level)134 void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
135                                         ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
136                                         ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
137                                         size_t window_dimension, size_t level)
138 {
139     configure(CLKernelLibrary::get().get_compile_context(), old_input, old_scharr_gx, old_scharr_gy, old_points_internal, new_points_internal, coeff_table, old_ival, window_dimension, level);
140 }
141 
configure(const CLCompileContext & compile_context,const ICLTensor * old_input,const ICLTensor * old_scharr_gx,const ICLTensor * old_scharr_gy,ICLLKInternalKeypointArray * old_points_internal,ICLLKInternalKeypointArray * new_points_internal,ICLCoefficientTableArray * coeff_table,ICLOldValArray * old_ival,size_t window_dimension,size_t level)142 void CLLKTrackerStage0Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
143                                         ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
144                                         ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
145                                         size_t window_dimension, size_t level)
146 
147 {
148     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_input, 1, DataType::U8);
149     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gx, 1, DataType::S16);
150     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gy, 1, DataType::S16);
151     ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
152     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
153     ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
154     ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
155 
156     _old_input     = old_input;
157     _old_scharr_gx = old_scharr_gx;
158     _old_scharr_gy = old_scharr_gy;
159 
160     // Configure kernel window
161     Window window;
162     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
163     window.set(Window::DimY, Window::Dimension(0, 1, 1));
164 
165     const ValidRegion valid_region = intersect_valid_regions(
166                                          old_input->info()->valid_region(),
167                                          old_scharr_gx->info()->valid_region(),
168                                          old_scharr_gy->info()->valid_region());
169 
170     update_window_and_padding(window,
171                               AccessWindowStatic(old_input->info(), valid_region.start(0), valid_region.start(1),
172                                                  valid_region.end(0), valid_region.end(1)),
173                               AccessWindowStatic(old_scharr_gx->info(), valid_region.start(0), valid_region.start(1),
174                                                  valid_region.end(0), valid_region.end(1)),
175                               AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1),
176                                                  valid_region.end(0), valid_region.end(1)));
177 
178     ICLKernel::configure_internal(window);
179 
180     // Initialize required variables
181     const int       level0              = (level == 0) ? 1 : 0;
182     const int       window_size         = window_dimension;
183     const int       window_size_squared = window_dimension * window_dimension;
184     const int       window_size_half    = window_dimension / 2;
185     const float     eig_const           = 1.0f / (2.0f * window_size_squared);
186     const cl_float3 border_limits =
187     {
188         {
189             // -1 because we load 2 values at once for bilinear interpolation
190             static_cast<cl_float>(valid_region.end(0) - window_size - 1),
191             static_cast<cl_float>(valid_region.end(1) - window_size - 1),
192             static_cast<cl_float>(valid_region.start(0))
193         }
194     };
195 
196     // Create kernel
197     _kernel = create_kernel(compile_context, "lktracker_stage0");
198 
199     // Set arguments
200     unsigned int idx = 3 * num_arguments_per_2D_tensor();
201     _kernel.setArg(idx++, old_points_internal->cl_buffer());
202     _kernel.setArg(idx++, new_points_internal->cl_buffer());
203     _kernel.setArg(idx++, coeff_table->cl_buffer());
204     _kernel.setArg(idx++, old_ival->cl_buffer());
205     _kernel.setArg<cl_int>(idx++, window_size);
206     _kernel.setArg<cl_int>(idx++, window_size_squared);
207     _kernel.setArg<cl_int>(idx++, window_size_half);
208     _kernel.setArg<cl_float3>(idx++, border_limits);
209     _kernel.setArg<cl_float>(idx++, eig_const);
210     _kernel.setArg<cl_int>(idx++, level0);
211 }
212 
run(const Window & window,cl::CommandQueue & queue)213 void CLLKTrackerStage0Kernel::run(const Window &window, cl::CommandQueue &queue)
214 {
215     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
216     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
217 
218     // Set static tensor arguments. Setting here as allocation might be deferred.
219     unsigned int idx = 0;
220     add_2D_tensor_argument(idx, _old_input, window);
221     add_2D_tensor_argument(idx, _old_scharr_gx, window);
222     add_2D_tensor_argument(idx, _old_scharr_gy, window);
223 
224     enqueue(queue, *this, window, lws_hint());
225 }
226 
CLLKTrackerStage1Kernel()227 CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel()
228     : _new_input(nullptr)
229 {
230 }
231 
configure(const ICLTensor * new_input,ICLLKInternalKeypointArray * new_points_internal,ICLCoefficientTableArray * coeff_table,ICLOldValArray * old_ival,Termination termination,float epsilon,size_t num_iterations,size_t window_dimension,size_t level)232 void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
233                                         Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
234 {
235     configure(CLKernelLibrary::get().get_compile_context(), new_input, new_points_internal, coeff_table, old_ival, termination, epsilon, num_iterations, window_dimension, level);
236 }
237 
configure(const CLCompileContext & compile_context,const ICLTensor * new_input,ICLLKInternalKeypointArray * new_points_internal,ICLCoefficientTableArray * coeff_table,ICLOldValArray * old_ival,Termination termination,float epsilon,size_t num_iterations,size_t window_dimension,size_t level)238 void CLLKTrackerStage1Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table,
239                                         ICLOldValArray *old_ival,
240                                         Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
241 
242 {
243     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(new_input, 1, DataType::U8);
244     ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
245     ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
246     ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
247 
248     _new_input = new_input;
249 
250     // Configure kernel window
251     Window window;
252     window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
253     window.set(Window::DimY, Window::Dimension(0, 1, 1));
254 
255     const ValidRegion &valid_region = new_input->info()->valid_region();
256 
257     update_window_and_padding(window,
258                               AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1),
259                                                  valid_region.end(0), valid_region.end(1)));
260 
261     ICLKernel::configure_internal(window);
262 
263     // Initialize required variables
264     const int       level0              = (level == 0) ? 1 : 0;
265     const int       window_size         = window_dimension;
266     const int       window_size_squared = window_dimension * window_dimension;
267     const int       window_size_half    = window_dimension / 2;
268     const float     eig_const           = 1.0f / (2.0f * window_size_squared);
269     const cl_float3 border_limits =
270     {
271         {
272             // -1 because we load 2 values at once for bilinear interpolation
273             static_cast<cl_float>(valid_region.end(0) - window_size - 1),
274             static_cast<cl_float>(valid_region.end(1) - window_size - 1),
275             static_cast<cl_float>(valid_region.start(0))
276         }
277     };
278 
279     // Set maximum number of iterations used for convergence
280     const size_t max_iterations = 1000;
281     num_iterations              = (termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : num_iterations;
282 
283     const int term_epsilon = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
284 
285     // Create kernel
286     _kernel = create_kernel(compile_context, "lktracker_stage1");
287 
288     // Set static kernel arguments
289     unsigned int idx = num_arguments_per_2D_tensor();
290     _kernel.setArg(idx++, new_points_internal->cl_buffer());
291     _kernel.setArg(idx++, coeff_table->cl_buffer());
292     _kernel.setArg(idx++, old_ival->cl_buffer());
293     _kernel.setArg<cl_int>(idx++, window_size);
294     _kernel.setArg<cl_int>(idx++, window_size_squared);
295     _kernel.setArg<cl_int>(idx++, window_size_half);
296     _kernel.setArg<cl_int>(idx++, num_iterations);
297     _kernel.setArg<cl_float>(idx++, epsilon);
298     _kernel.setArg<cl_float3>(idx++, border_limits);
299     _kernel.setArg<cl_float>(idx++, eig_const);
300     _kernel.setArg<cl_int>(idx++, level0);
301     _kernel.setArg<cl_int>(idx++, term_epsilon);
302 }
303 
run(const Window & window,cl::CommandQueue & queue)304 void CLLKTrackerStage1Kernel::run(const Window &window, cl::CommandQueue &queue)
305 {
306     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
307     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
308 
309     // Set static tensor arguments. Setting here as allocation might be deferred.
310     unsigned int idx = 0;
311     add_2D_tensor_argument(idx, _new_input, window);
312 
313     enqueue(queue, *this, window, lws_hint());
314 }
315