• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NEWarpKernel.h"
25 
26 #include "arm_compute/core/Coordinates.h"
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/core/Helpers.h"
29 #include "arm_compute/core/ITensor.h"
30 #include "arm_compute/core/TensorInfo.h"
31 #include "arm_compute/core/Validate.h"
32 #include "arm_compute/core/Window.h"
33 #include "src/core/AccessWindowStatic.h"
34 #include "src/core/helpers/AutoConfiguration.h"
35 #include "src/core/helpers/ScaleHelpers.h"
36 #include "src/core/helpers/WindowHelpers.h"
37 
38 #include <cstddef>
39 
40 using namespace arm_compute;
41 
42 namespace
43 {
nearest_interpolation(const uint8_t * in_ptr,int x,int y,size_t stride)44 inline uint8_t nearest_interpolation(const uint8_t *in_ptr, int x, int y, size_t stride)
45 {
46     return in_ptr[x + y * stride];
47 }
48 } // namespace
49 
INEWarpKernel()50 INEWarpKernel::INEWarpKernel()
51     : _func(nullptr), _input(nullptr), _output(nullptr), _constant_border_value(0), _matrix()
52 {
53 }
54 
border_size() const55 BorderSize INEWarpKernel::border_size() const
56 {
57     return BorderSize(1);
58 }
59 
run(const Window & window,const ThreadInfo & info)60 void INEWarpKernel::run(const Window &window, const ThreadInfo &info)
61 {
62     ARM_COMPUTE_UNUSED(info);
63     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
64     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
65     ARM_COMPUTE_ERROR_ON(_func == nullptr);
66 
67     (this->*_func)(window);
68 }
69 
configure(const ITensor * input,ITensor * output,const std::array<float,9> & matrix,BorderMode border_mode,uint8_t constant_border_value)70 void INEWarpKernel::configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value)
71 {
72     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
73     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
74 
75     _matrix                = matrix;
76     _constant_border_value = constant_border_value;
77 
78     switch(border_mode)
79     {
80         case BorderMode::UNDEFINED:
81             _func = &INEWarpKernel::warp_undefined;
82             break;
83         case BorderMode::CONSTANT:
84             _func = &INEWarpKernel::warp_constant;
85             break;
86         case BorderMode::REPLICATE:
87             _func = &INEWarpKernel::warp_replicate;
88             break;
89         default:
90             ARM_COMPUTE_ERROR("Border mode not supported");
91             break;
92     }
93 
94     _input  = input;
95     _output = output;
96 
97     // Configure kernel window
98     Window win = calculate_max_window(*output->info(), Steps(1U));
99 
100     const ValidRegion &input_valid_region = input->info()->valid_region();
101 
102     // Reads can occur within the valid region of the input
103     AccessWindowStatic input_access(input->info(),
104                                     input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top,
105                                     input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
106                                     input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
107     AccessWindowHorizontal output_access(output->info(), 0, 1);
108 
109     update_window_and_padding(win, input_access, output_access);
110 
111     output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
112 
113     INEKernel::configure(win);
114 }
115 
116 template <InterpolationPolicy interpolation>
warp_undefined(const Window & window)117 void NEWarpAffineKernel<interpolation>::warp_undefined(const Window &window)
118 {
119     // Don't increment in X and Y direction for the input tensor
120     // A pointer to the start of this plane is needed as base for the precomputed offsets
121     Window win_in(window);
122     win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
123     win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
124 
125     Iterator in(_input, win_in);
126     Iterator out(_output, window);
127 
128     const int    min_x  = _input->info()->valid_region().anchor[0];
129     const int    max_x  = min_x + _input->info()->valid_region().shape[0];
130     const int    min_y  = _input->info()->valid_region().anchor[1];
131     const int    max_y  = min_y + _input->info()->valid_region().shape[1];
132     const size_t stride = _input->info()->strides_in_bytes()[1];
133 
134     // x0 = M01 * x + M01 * y + M02
135     // y0 = M11 * x + M11 * y + M12
136     const float M00 = _matrix[0];
137     const float M10 = _matrix[1];
138     const float M01 = _matrix[0 + 1 * 2];
139     const float M11 = _matrix[1 + 1 * 2];
140     const float M02 = _matrix[0 + 2 * 2];
141     const float M12 = _matrix[1 + 2 * 2];
142 
143     // "M00 * x" and "M10 * x", when x = window.x.start
144     const float start_x0 = M00 * window.x().start();
145     const float start_y0 = M10 * window.x().start();
146 
147     // Current row
148     int y_cur  = window.y().start();
149     int z_cur  = window.z().start();
150     int d3_cur = window[3].start();
151     int d4_cur = window[4].start();
152     int d5_cur = window[5].start();
153 
154     // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
155     float const_x0 = M01 * y_cur + M02;
156     float const_y0 = M11 * y_cur + M12;
157 
158     // Affine warp coordinates
159     float x0 = start_x0 + const_x0;
160     float y0 = start_y0 + const_y0;
161 
162     execute_window_loop(window, [&](const Coordinates & id)
163     {
164         // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
165         if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
166         {
167             y_cur  = id.y();
168             z_cur  = id.z();
169             d3_cur = id[3];
170             d4_cur = id[4];
171             d5_cur = id[5];
172 
173             const_x0 = M01 * y_cur + M02;
174             const_y0 = M11 * y_cur + M12;
175 
176             x0 = start_x0 + const_x0;
177             y0 = start_y0 + const_y0;
178         }
179 
180         // Only write to output if x0 and y0 are within the valid region.
181         // Otherwise the read value would be undefined.
182         if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
183         {
184             switch(interpolation)
185             {
186                 case InterpolationPolicy::NEAREST_NEIGHBOR:
187                     *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
188                     break;
189                 case InterpolationPolicy::BILINEAR:
190                     *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
191                     break;
192                 default:
193                     ARM_COMPUTE_ERROR("Interpolation not supported");
194             }
195         }
196 
197         x0 += M00;
198         y0 += M10;
199     },
200     in, out);
201 }
202 
203 template <InterpolationPolicy interpolation>
warp_constant(const Window & window)204 void NEWarpAffineKernel<interpolation>::warp_constant(const Window &window)
205 {
206     // Don't increment in X and Y direction for the input tensor
207     // A pointer to the start of this plane is needed as base for the precomputed offsets
208     Window win_in(window);
209     win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
210     win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
211 
212     Iterator in(_input, win_in);
213     Iterator out(_output, window);
214 
215     const int    min_x  = _input->info()->valid_region().anchor[0];
216     const int    max_x  = min_x + _input->info()->valid_region().shape[0];
217     const int    min_y  = _input->info()->valid_region().anchor[1];
218     const int    max_y  = min_y + _input->info()->valid_region().shape[1];
219     const size_t stride = _input->info()->strides_in_bytes()[1];
220 
221     // x0 = M01 * x + M01 * y + M02
222     // y0 = M11 * x + M11 * y + M12
223     const float M00 = _matrix[0];
224     const float M10 = _matrix[1];
225     const float M01 = _matrix[0 + 1 * 2];
226     const float M11 = _matrix[1 + 1 * 2];
227     const float M02 = _matrix[0 + 2 * 2];
228     const float M12 = _matrix[1 + 2 * 2];
229 
230     // "M00 * x" and "M10 * x", when x = window.x.start
231     const float start_x0 = M00 * window.x().start();
232     const float start_y0 = M10 * window.x().start();
233 
234     // Current row
235     int y_cur  = window.y().start();
236     int z_cur  = window.z().start();
237     int d3_cur = window[3].start();
238     int d4_cur = window[4].start();
239     int d5_cur = window[5].start();
240 
241     // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
242     float const_x0 = M01 * y_cur + M02;
243     float const_y0 = M11 * y_cur + M12;
244 
245     // Affine warp coordinates
246     float x0 = start_x0 + const_x0;
247     float y0 = start_y0 + const_y0;
248 
249     execute_window_loop(window, [&](const Coordinates & id)
250     {
251         // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
252         if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
253         {
254             y_cur  = id.y();
255             z_cur  = id.z();
256             d3_cur = id[3];
257             d4_cur = id[4];
258             d5_cur = id[5];
259 
260             const_x0 = M01 * y_cur + M02;
261             const_y0 = M11 * y_cur + M12;
262 
263             x0 = start_x0 + const_x0;
264             y0 = start_y0 + const_y0;
265         }
266 
267         // Only use input values if x0 and y0 are within the valid region.
268         // Otherwise write the constant border value.
269         if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
270         {
271             switch(interpolation)
272             {
273                 case InterpolationPolicy::NEAREST_NEIGHBOR:
274                     *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
275                     break;
276                 case InterpolationPolicy::BILINEAR:
277                     *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
278                     break;
279                 default:
280                     ARM_COMPUTE_ERROR("Interpolation not supported");
281             }
282         }
283         else
284         {
285             switch(interpolation)
286             {
287                 case InterpolationPolicy::NEAREST_NEIGHBOR:
288                     *out.ptr() = _constant_border_value;
289                     break;
290                 case InterpolationPolicy::BILINEAR:
291                 {
292                     const auto xi   = utility::clamp<int>(std::floor(x0), min_x - 1, max_x);
293                     const auto yi   = utility::clamp<int>(std::floor(y0), min_y - 1, max_y);
294                     const auto xi_1 = utility::clamp<int>(std::floor(x0 + 1), min_x - 1, max_x);
295                     const auto yi_1 = utility::clamp<int>(std::floor(y0 + 1), min_y - 1, max_y);
296 
297                     const float dx  = x0 - std::floor(x0);
298                     const float dy  = y0 - std::floor(y0);
299                     const float dx1 = 1.0f - dx;
300                     const float dy1 = 1.0f - dy;
301 
302                     const float a00 = *(in.ptr() + xi + yi * stride);
303                     const float a01 = *(in.ptr() + xi_1 + yi * stride);
304                     const float a10 = *(in.ptr() + xi + yi_1 * stride);
305                     const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
306 
307                     *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
308                 }
309                 break;
310                 default:
311                     ARM_COMPUTE_ERROR("Interpolation not supported");
312             }
313         }
314 
315         x0 += M00;
316         y0 += M10;
317     },
318     in, out);
319 }
320 
321 template <InterpolationPolicy interpolation>
warp_replicate(const Window & window)322 void NEWarpAffineKernel<interpolation>::warp_replicate(const Window &window)
323 {
324     // Don't increment in X and Y direction for the input tensor
325     // A pointer to the start of this plane is needed as base for the precomputed offsets
326     Window win_in(window);
327     win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
328     win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
329 
330     Iterator in(_input, win_in);
331     Iterator out(_output, window);
332 
333     const int    min_x  = _input->info()->valid_region().anchor[0];
334     const int    max_x  = min_x + _input->info()->valid_region().shape[0];
335     const int    min_y  = _input->info()->valid_region().anchor[1];
336     const int    max_y  = min_y + _input->info()->valid_region().shape[1];
337     const size_t stride = _input->info()->strides_in_bytes()[1];
338 
339     // Current row
340     int y_cur  = window.y().start();
341     int z_cur  = window.z().start();
342     int d3_cur = window[3].start();
343     int d4_cur = window[4].start();
344     int d5_cur = window[5].start();
345 
346     const float M00 = _matrix[0];
347     const float M10 = _matrix[1];
348     const float M01 = _matrix[0 + 1 * 2];
349     const float M11 = _matrix[1 + 1 * 2];
350     const float M02 = _matrix[0 + 2 * 2];
351     const float M12 = _matrix[1 + 2 * 2];
352 
353     // "M00 * x" and "M10 * x", when x = window.x.start
354     const float start_x0 = M00 * window.x().start();
355     const float start_y0 = M10 * window.x().start();
356 
357     // const_x0 and const_y0 are the constant parts of x0 and y0 during the row processing
358     float const_x0 = M01 * y_cur + M02;
359     float const_y0 = M11 * y_cur + M12;
360 
361     float x0 = start_x0 + const_x0;
362     float y0 = start_y0 + const_y0;
363 
364     execute_window_loop(window, [&](const Coordinates & id)
365     {
366         // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
367         if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
368         {
369             y_cur  = id.y();
370             z_cur  = id.z();
371             d3_cur = id[3];
372             d4_cur = id[4];
373             d5_cur = id[5];
374 
375             const_x0 = M01 * y_cur + M02;
376             const_y0 = M11 * y_cur + M12;
377 
378             x0 = start_x0 + const_x0;
379             y0 = start_y0 + const_y0;
380         }
381 
382         // Only load from (x0, y0) if the point is within the valid region.
383         // Otherwise load from the edge of the valid region.
384         if((min_y <= y0) && (y0 < max_y) && (min_x <= x0) && (x0 < max_x))
385         {
386             switch(interpolation)
387             {
388                 case InterpolationPolicy::NEAREST_NEIGHBOR:
389                     *out.ptr() = nearest_interpolation(in.ptr(), x0, y0, stride);
390                     break;
391                 case InterpolationPolicy::BILINEAR:
392                     *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, x0, y0);
393                     break;
394                 default:
395                     ARM_COMPUTE_ERROR("Interpolation not supported");
396             }
397         }
398         else
399         {
400             // Clamp coordinates
401             const auto xi = utility::clamp<int>(std::floor(x0), min_x, max_x - 1);
402             const auto yi = utility::clamp<int>(std::floor(y0), min_y, max_y - 1);
403             switch(interpolation)
404             {
405                 case InterpolationPolicy::NEAREST_NEIGHBOR:
406                     *out.ptr() = *(in.ptr() + xi + yi * stride);
407                     break;
408                 case InterpolationPolicy::BILINEAR:
409                 {
410                     const auto xi_1 = utility::clamp<int>(std::floor(x0 + 1), min_x, max_x - 1);
411                     const auto yi_1 = utility::clamp<int>(std::floor(y0 + 1), min_y, max_y - 1);
412 
413                     const float dx  = x0 - std::floor(x0);
414                     const float dy  = y0 - std::floor(y0);
415                     const float dx1 = 1.0f - dx;
416                     const float dy1 = 1.0f - dy;
417 
418                     const float a00 = *(in.ptr() + xi + yi * stride);
419                     const float a01 = *(in.ptr() + xi_1 + yi * stride);
420                     const float a10 = *(in.ptr() + xi + yi_1 * stride);
421                     const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
422 
423                     *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
424                 }
425                 break;
426                 default:
427                     ARM_COMPUTE_ERROR("Interpolation not supported");
428             }
429         }
430 
431         x0 += M00;
432         y0 += M10;
433     },
434     in, out);
435 }
436 
437 template <InterpolationPolicy interpolation>
warp_undefined(const Window & window)438 void NEWarpPerspectiveKernel<interpolation>::warp_undefined(const Window &window)
439 {
440     // Don't increment in X and Y direction for the input tensor
441     // A pointer to the start of this plane is needed as base for the precomputed offsets
442     Window win_in(window);
443     win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
444     win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
445 
446     Iterator in(_input, win_in);
447     Iterator out(_output, window);
448 
449     const int    min_x  = _input->info()->valid_region().anchor[0];
450     const int    max_x  = min_x + _input->info()->valid_region().shape[0];
451     const int    min_y  = _input->info()->valid_region().anchor[1];
452     const int    max_y  = min_y + _input->info()->valid_region().shape[1];
453     const size_t stride = _input->info()->strides_in_bytes()[1];
454 
455     // x0 = M00 * x + M01 * y + M02
456     // y0 = M10 * x + M11 * y + M12
457     // z0 = M20 * x + M21 * y + M22
458     // xn = x0 / z0
459     // yn = y0 / z0
460     const float M00 = _matrix[0];
461     const float M10 = _matrix[1];
462     const float M20 = _matrix[2];
463     const float M01 = _matrix[0 + 1 * 3];
464     const float M11 = _matrix[1 + 1 * 3];
465     const float M21 = _matrix[2 + 1 * 3];
466     const float M02 = _matrix[0 + 2 * 3];
467     const float M12 = _matrix[1 + 2 * 3];
468     const float M22 = _matrix[2 + 2 * 3];
469 
470     // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
471     const float start_x0 = M00 * window.x().start();
472     const float start_y0 = M10 * window.x().start();
473     const float start_z0 = M20 * window.x().start();
474 
475     // Current row
476     int y_cur  = window.y().start();
477     int z_cur  = window.z().start();
478     int d3_cur = window[3].start();
479     int d4_cur = window[4].start();
480     int d5_cur = window[5].start();
481 
482     // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
483     float const_x0 = M01 * y_cur + M02;
484     float const_y0 = M11 * y_cur + M12;
485     float const_z0 = M21 * y_cur + M22;
486 
487     // Perspective warp coordinates
488     float x0 = start_x0 + const_x0;
489     float y0 = start_y0 + const_y0;
490     float z0 = start_z0 + const_z0;
491 
492     execute_window_loop(window, [&](const Coordinates & id)
493     {
494         // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
495         if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
496         {
497             y_cur  = id.y();
498             z_cur  = id.z();
499             d3_cur = id[3];
500             d4_cur = id[4];
501             d5_cur = id[5];
502 
503             const_x0 = M01 * y_cur + M02;
504             const_y0 = M11 * y_cur + M12;
505             const_z0 = M21 * y_cur + M22;
506 
507             x0 = start_x0 + const_x0;
508             y0 = start_y0 + const_y0;
509             z0 = start_z0 + const_z0;
510         }
511 
512         const float xn = x0 / z0;
513         const float yn = y0 / z0;
514 
515         // Only write to output if xn and yn are within the valid region.
516         // Otherwise the read value would be undefined.
517         if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
518         {
519             switch(interpolation)
520             {
521                 case InterpolationPolicy::NEAREST_NEIGHBOR:
522                     *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
523                     break;
524                 case InterpolationPolicy::BILINEAR:
525                     *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
526                     break;
527                 default:
528                     ARM_COMPUTE_ERROR("Interpolation not supported");
529             }
530         }
531 
532         x0 += M00;
533         y0 += M10;
534         z0 += M20;
535     },
536     in, out);
537 }
538 
539 template <InterpolationPolicy interpolation>
warp_constant(const Window & window)540 void NEWarpPerspectiveKernel<interpolation>::warp_constant(const Window &window)
541 {
542     // Don't increment in X and Y direction for the input tensor
543     // A pointer to the start of this plane is needed as base for the precomputed offsets
544     Window win_in(window);
545     win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
546     win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
547 
548     Iterator in(_input, win_in);
549     Iterator out(_output, window);
550 
551     const int    min_x  = _input->info()->valid_region().anchor[0];
552     const int    max_x  = min_x + _input->info()->valid_region().shape[0];
553     const int    min_y  = _input->info()->valid_region().anchor[1];
554     const int    max_y  = min_y + _input->info()->valid_region().shape[1];
555     const size_t stride = _input->info()->strides_in_bytes()[1];
556 
557     // x0 = M00 * x + M01 * y + M02
558     // y0 = M10 * x + M11 * y + M12
559     // z0 = M20 * x + M21 * y + M22
560     // xn = x0 / z0
561     // yn = y0 / z0
562     const float M00 = _matrix[0];
563     const float M10 = _matrix[1];
564     const float M20 = _matrix[2];
565     const float M01 = _matrix[0 + 1 * 3];
566     const float M11 = _matrix[1 + 1 * 3];
567     const float M21 = _matrix[2 + 1 * 3];
568     const float M02 = _matrix[0 + 2 * 3];
569     const float M12 = _matrix[1 + 2 * 3];
570     const float M22 = _matrix[2 + 2 * 3];
571 
572     // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
573     const float start_x0 = M00 * window.x().start();
574     const float start_y0 = M10 * window.x().start();
575     const float start_z0 = M20 * window.x().start();
576 
577     // Current row
578     int y_cur  = window.y().start();
579     int z_cur  = window.z().start();
580     int d3_cur = window[3].start();
581     int d4_cur = window[4].start();
582     int d5_cur = window[5].start();
583 
584     // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
585     float const_x0 = M01 * y_cur + M02;
586     float const_y0 = M11 * y_cur + M12;
587     float const_z0 = M21 * y_cur + M22;
588 
589     // Perspective warp coordinates
590     float x0 = start_x0 + const_x0;
591     float y0 = start_y0 + const_y0;
592     float z0 = start_z0 + const_z0;
593 
594     execute_window_loop(window, [&](const Coordinates & id)
595     {
596         // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
597         if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
598         {
599             y_cur  = id.y();
600             z_cur  = id.z();
601             d3_cur = id[3];
602             d4_cur = id[4];
603             d5_cur = id[5];
604 
605             const_x0 = M01 * y_cur + M02;
606             const_y0 = M11 * y_cur + M12;
607             const_z0 = M21 * y_cur + M22;
608 
609             x0 = start_x0 + const_x0;
610             y0 = start_y0 + const_y0;
611             z0 = start_z0 + const_z0;
612         }
613 
614         const float xn = x0 / z0;
615         const float yn = y0 / z0;
616 
617         // Only use input values if xn and yn are within the valid region.
618         if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
619         {
620             switch(interpolation)
621             {
622                 case InterpolationPolicy::NEAREST_NEIGHBOR:
623                     *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
624                     break;
625                 case InterpolationPolicy::BILINEAR:
626                     *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
627                     break;
628                 default:
629                     ARM_COMPUTE_ERROR("Interpolation not supported");
630             }
631         }
632         else
633         {
634             switch(interpolation)
635             {
636                 case InterpolationPolicy::NEAREST_NEIGHBOR:
637                     *out.ptr() = _constant_border_value;
638                     break;
639                 case InterpolationPolicy::BILINEAR:
640                 {
641                     const auto xi   = utility::clamp<int>(std::floor(xn), min_x - 1, max_x);
642                     const auto yi   = utility::clamp<int>(std::floor(yn), min_y - 1, max_y);
643                     const auto xi_1 = utility::clamp<int>(std::floor(xn + 1), min_x - 1, max_x);
644                     const auto yi_1 = utility::clamp<int>(std::floor(yn + 1), min_y - 1, max_y);
645 
646                     const float dx  = xn - std::floor(xn);
647                     const float dy  = yn - std::floor(yn);
648                     const float dx1 = 1.0f - dx;
649                     const float dy1 = 1.0f - dy;
650 
651                     const float a00 = *(in.ptr() + xi + yi * stride);
652                     const float a01 = *(in.ptr() + xi_1 + yi * stride);
653                     const float a10 = *(in.ptr() + xi + yi_1 * stride);
654                     const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
655 
656                     *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
657                 }
658                 break;
659                 default:
660                     ARM_COMPUTE_ERROR("Interpolation not supported");
661             }
662         }
663 
664         x0 += M00;
665         y0 += M10;
666         z0 += M20;
667     },
668     in, out);
669 }
670 
671 template <InterpolationPolicy interpolation>
warp_replicate(const Window & window)672 void NEWarpPerspectiveKernel<interpolation>::warp_replicate(const Window &window)
673 {
674     // Don't increment in X and Y direction for the input tensor
675     // A pointer to the start of this plane is needed as base for the precomputed offsets
676     Window win_in(window);
677     win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
678     win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
679 
680     Iterator in(_input, win_in);
681     Iterator out(_output, window);
682 
683     const int    min_x  = _input->info()->valid_region().anchor[0];
684     const int    max_x  = min_x + _input->info()->valid_region().shape[0];
685     const int    min_y  = _input->info()->valid_region().anchor[1];
686     const int    max_y  = min_y + _input->info()->valid_region().shape[1];
687     const size_t stride = _input->info()->strides_in_bytes()[1];
688 
689     // Current row
690     int y_cur  = window.y().start();
691     int z_cur  = window.z().start();
692     int d3_cur = window[3].start();
693     int d4_cur = window[4].start();
694     int d5_cur = window[5].start();
695 
696     // x0 = M00 * x + M01 * y + M02
697     // y0 = M10 * x + M11 * y + M12
698     // z0 = M20 * x + M21 * y + M22
699     // xn = x0 / z0
700     // yn = y0 / z0
701     const float M00 = _matrix[0];
702     const float M10 = _matrix[1];
703     const float M20 = _matrix[2];
704     const float M01 = _matrix[0 + 1 * 3];
705     const float M11 = _matrix[1 + 1 * 3];
706     const float M21 = _matrix[2 + 1 * 3];
707     const float M02 = _matrix[0 + 2 * 3];
708     const float M12 = _matrix[1 + 2 * 3];
709     const float M22 = _matrix[2 + 2 * 3];
710 
711     // "M00 * x", "M10 * x" and "M20 * x", when x = window.x.start
712     const float start_x0 = M00 * window.x().start();
713     const float start_y0 = M10 * window.x().start();
714     const float start_z0 = M20 * window.x().start();
715 
716     // const_x0, const_y0 and const_z0 are the constant parts of x0, y0 and z0 during the row processing
717     float const_x0 = M01 * y_cur + M02;
718     float const_y0 = M11 * y_cur + M12;
719     float const_z0 = M21 * y_cur + M22;
720 
721     // Perspective warp coordinates
722     float x0 = start_x0 + const_x0;
723     float y0 = start_y0 + const_y0;
724     float z0 = start_z0 + const_z0;
725 
726     execute_window_loop(window, [&](const Coordinates & id)
727     {
728         // Check if we are processing a new row. If so, update the current processed row (y_cur), x0, y0 and z0
729         if((y_cur != id.y()) || (z_cur != id.z()) || (d3_cur != id[3]) || (d4_cur != id[4]) || (d5_cur != id[5]))
730         {
731             y_cur  = id.y();
732             z_cur  = id.z();
733             d3_cur = id[3];
734             d4_cur = id[4];
735             d5_cur = id[5];
736 
737             const_x0 = M01 * y_cur + M02;
738             const_y0 = M11 * y_cur + M12;
739             const_z0 = M21 * y_cur + M22;
740 
741             x0 = start_x0 + const_x0;
742             y0 = start_y0 + const_y0;
743             z0 = start_z0 + const_z0;
744         }
745 
746         const float xn = x0 / z0;
747         const float yn = y0 / z0;
748 
749         // Only load from (x0, y0) if the point is within the valid region.
750         if((min_y <= yn) && (yn < max_y) && (min_x <= xn) && (xn < max_x))
751         {
752             switch(interpolation)
753             {
754                 case InterpolationPolicy::NEAREST_NEIGHBOR:
755                     *out.ptr() = nearest_interpolation(in.ptr(), xn, yn, stride);
756                     break;
757                 case InterpolationPolicy::BILINEAR:
758                     *out.ptr() = scale_helpers::pixel_bilinear_c1(in.ptr(), stride, xn, yn);
759                     break;
760                 default:
761                     ARM_COMPUTE_ERROR("Interpolation not supported");
762             }
763         }
764         else
765         {
766             // Clamp coordinates
767             const auto xi = utility::clamp<int>(std::floor(xn), min_x, max_x - 1);
768             const auto yi = utility::clamp<int>(std::floor(yn), min_y, max_y - 1);
769             switch(interpolation)
770             {
771                 case InterpolationPolicy::NEAREST_NEIGHBOR:
772                     *out.ptr() = *(in.ptr() + xi + yi * stride);
773                     break;
774                 case InterpolationPolicy::BILINEAR:
775                 {
776                     const auto xi_1 = utility::clamp<int>(std::floor(xn + 1), min_x, max_x - 1);
777                     const auto yi_1 = utility::clamp<int>(std::floor(yn + 1), min_y, max_y - 1);
778 
779                     const float dx  = xn - std::floor(xn);
780                     const float dy  = yn - std::floor(yn);
781                     const float dx1 = 1.0f - dx;
782                     const float dy1 = 1.0f - dy;
783 
784                     const float a00 = *(in.ptr() + xi + yi * stride);
785                     const float a01 = *(in.ptr() + xi_1 + yi * stride);
786                     const float a10 = *(in.ptr() + xi + yi_1 * stride);
787                     const float a11 = *(in.ptr() + xi_1 + yi_1 * stride);
788 
789                     *out.ptr() = a00 * (dx1 * dy1) + a01 * (dx * dy1) + a10 * (dx1 * dy) + a11 * (dx * dy);
790                 }
791                 break;
792                 default:
793                     ARM_COMPUTE_ERROR("Interpolation not supported");
794             }
795         }
796 
797         x0 += M00;
798         y0 += M10;
799         z0 += M20;
800     },
801     in, out);
802 }
803 
804 template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
805 template class arm_compute::NEWarpAffineKernel<InterpolationPolicy::BILINEAR>;
806 template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::NEAREST_NEIGHBOR>;
807 template class arm_compute::NEWarpPerspectiveKernel<InterpolationPolicy::BILINEAR>;
808