• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NESobel3x3Kernel.h"
25 
26 #include "arm_compute/core/Coordinates.h"
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/core/Helpers.h"
29 #include "arm_compute/core/ITensor.h"
30 #include "arm_compute/core/Types.h"
31 #include "arm_compute/core/Validate.h"
32 #include "src/core/helpers/AutoConfiguration.h"
33 #include "src/core/helpers/WindowHelpers.h"
34 
35 #include <arm_neon.h>
36 #include <cstdint>
37 
38 using namespace arm_compute;
39 
NESobel3x3Kernel()40 NESobel3x3Kernel::NESobel3x3Kernel()
41     : _run_sobel_x(false), _run_sobel_y(false), _input(nullptr), _output_x(nullptr), _output_y(nullptr)
42 {
43 }
44 
border_size() const45 BorderSize NESobel3x3Kernel::border_size() const
46 {
47     return BorderSize{ 1 };
48 }
49 
configure(const ITensor * input,ITensor * output_x,ITensor * output_y,bool border_undefined)50 void NESobel3x3Kernel::configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined)
51 {
52     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
53     ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
54 
55     _run_sobel_x = output_x != nullptr;
56     _run_sobel_y = output_y != nullptr;
57 
58     if(_run_sobel_x)
59     {
60         ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16);
61     }
62 
63     if(_run_sobel_y)
64     {
65         ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16);
66     }
67 
68     _input    = input;
69     _output_x = output_x;
70     _output_y = output_y;
71 
72     // Configure kernel window
73     constexpr unsigned int num_elems_processed_per_iteration = 8;
74     constexpr unsigned int num_elems_read_per_iteration      = 16;
75     constexpr unsigned int num_elems_written_per_iteration   = 8;
76     constexpr unsigned int num_rows_read_per_iteration       = 3;
77 
78     Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
79     AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration);
80     AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration);
81 
82     update_window_and_padding(win,
83                               AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration),
84                               output_x_access,
85                               output_y_access);
86 
87     output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
88     output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size());
89 
90     INEKernel::configure(win);
91 }
92 
run(const Window & window,const ThreadInfo & info)93 void NESobel3x3Kernel::run(const Window &window, const ThreadInfo &info)
94 {
95     ARM_COMPUTE_UNUSED(info);
96     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
97     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
98 
99     const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-1, -1));
100     const unsigned char *const input_mid_ptr = _input->ptr_to_element(Coordinates(-1, 0));
101     const unsigned char *const input_bot_ptr = _input->ptr_to_element(Coordinates(-1, 1));
102 
103     Iterator input(_input, window);
104     Iterator output_y;
105     Iterator output_x;
106 
107     if(_run_sobel_y)
108     {
109         output_y = Iterator(_output_y, window);
110     }
111 
112     if(_run_sobel_x)
113     {
114         output_x = Iterator(_output_x, window);
115     }
116 
117     static const int16x8_t two      = vdupq_n_s16(2);
118     static const int16x8_t minustwo = vdupq_n_s16(-2);
119 
120     if(_run_sobel_y && _run_sobel_x)
121     {
122         execute_window_loop(window, [&](const Coordinates &)
123         {
124             const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset());
125             const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset());
126             const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset());
127 
128             const int16x8x2_t top_s16 =
129             {
130                 {
131                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))),
132                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data)))
133                 }
134             };
135             const int16x8x2_t mid_s16 =
136             {
137                 {
138                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))),
139                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data)))
140                 }
141             };
142             const int16x8x2_t bot_s16 =
143             {
144                 {
145                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))),
146                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data)))
147                 }
148             };
149 
150             //SOBEL Y
151             //top left
152             int16x8_t out_y = vnegq_s16(top_s16.val[0]);
153             //top mid
154             out_y = vmlaq_s16(out_y, vextq_s16(top_s16.val[0], top_s16.val[1], 1), minustwo);
155             //top right
156             out_y = vsubq_s16(out_y, vextq_s16(top_s16.val[0], top_s16.val[1], 2));
157             //bot left
158             out_y = vaddq_s16(out_y, bot_s16.val[0]);
159             //bot mid
160             out_y = vmlaq_s16(out_y, vextq_s16(bot_s16.val[0], bot_s16.val[1], 1), two);
161             //bot right
162             out_y = vaddq_s16(out_y, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2));
163 
164             vst1q_s16(reinterpret_cast<int16_t *>(output_y.ptr()), out_y);
165 
166             //SOBEL X
167             //top left
168             int16x8_t out_x = vnegq_s16(top_s16.val[0]);
169             //top right
170             out_x = vaddq_s16(out_x, vextq_s16(top_s16.val[0], top_s16.val[1], 2));
171             //mid left
172             out_x = vmlaq_s16(out_x, mid_s16.val[0], minustwo);
173             //mid right
174             out_x = vmlaq_s16(out_x, vextq_s16(mid_s16.val[0], mid_s16.val[1], 2), two);
175             //bot left
176             out_x = vsubq_s16(out_x, bot_s16.val[0]);
177             //bot right
178             out_x = vaddq_s16(out_x, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2));
179 
180             vst1q_s16(reinterpret_cast<int16_t *>(output_x.ptr()), out_x);
181         },
182         input, output_x, output_y);
183     }
184     else if(_run_sobel_x)
185     {
186         execute_window_loop(window, [&](const Coordinates &)
187         {
188             const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset());
189             const uint8x16_t mid_data = vld1q_u8(input_mid_ptr + input.offset());
190             const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset());
191 
192             const int16x8x2_t top_s16 =
193             {
194                 {
195                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))),
196                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data)))
197                 }
198             };
199             const int16x8x2_t mid_s16 =
200             {
201                 {
202                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mid_data))),
203                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mid_data)))
204                 }
205             };
206             const int16x8x2_t bot_s16 =
207             {
208                 {
209                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))),
210                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data)))
211                 }
212             };
213 
214             //SOBEL X
215             //top left
216             int16x8_t out = vnegq_s16(top_s16.val[0]);
217             //top right
218             out = vaddq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 2));
219             //mid left
220             out = vmlaq_s16(out, mid_s16.val[0], minustwo);
221             //mid right
222             out = vmlaq_s16(out, vextq_s16(mid_s16.val[0], mid_s16.val[1], 2), two);
223             //bot left
224             out = vsubq_s16(out, bot_s16.val[0]);
225             //bot right
226             out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2));
227 
228             vst1q_s16(reinterpret_cast<int16_t *>(output_x.ptr()), out);
229         },
230         input, output_x);
231     }
232     else if(_run_sobel_y)
233     {
234         execute_window_loop(window, [&](const Coordinates &)
235         {
236             const uint8x16_t top_data = vld1q_u8(input_top_ptr + input.offset());
237             const uint8x16_t bot_data = vld1q_u8(input_bot_ptr + input.offset());
238 
239             const int16x8x2_t top_s16 =
240             {
241                 {
242                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(top_data))),
243                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(top_data)))
244                 }
245             };
246             const int16x8x2_t bot_s16 =
247             {
248                 {
249                     vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(bot_data))),
250                     vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(bot_data)))
251                 }
252             };
253 
254             //SOBEL Y
255             //top left
256             int16x8_t out = vnegq_s16(top_s16.val[0]);
257             //top mid
258             out = vmlaq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 1), minustwo);
259             //top right
260             out = vsubq_s16(out, vextq_s16(top_s16.val[0], top_s16.val[1], 2));
261             //bot left
262             out = vaddq_s16(out, bot_s16.val[0]);
263             //bot mid
264             out = vmlaq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 1), two);
265             //bot right
266             out = vaddq_s16(out, vextq_s16(bot_s16.val[0], bot_s16.val[1], 2));
267 
268             vst1q_s16(reinterpret_cast<int16_t *>(output_y.ptr()), out);
269         },
270         input, output_y);
271     }
272 }
273