1 /*
2 * Copyright (c) 2017-2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
25
26 #include "arm_compute/core/CL/ICLTensor.h"
27 #include "arm_compute/core/CL/OpenCL.h"
28 #include "arm_compute/core/Error.h"
29 #include "arm_compute/core/Validate.h"
30 #include "arm_compute/runtime/CL/CLScheduler.h"
31 #include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
32 #include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
33 #include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
34 #include "src/core/CL/kernels/CLCannyEdgeKernel.h"
35 #include "src/core/CL/kernels/CLFillBorderKernel.h"
36 #include "src/core/CL/kernels/CLSobel5x5Kernel.h"
37 #include "src/core/CL/kernels/CLSobel7x7Kernel.h"
38 #include "support/MemorySupport.h"
39
40 using namespace arm_compute;
41
CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager)42 CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
43 : _memory_group(std::move(memory_manager)),
44 _sobel(),
45 _gradient(support::cpp14::make_unique<CLGradientKernel>()),
46 _border_mag_gradient(support::cpp14::make_unique<CLFillBorderKernel>()),
47 _non_max_suppr(support::cpp14::make_unique<CLEdgeNonMaxSuppressionKernel>()),
48 _edge_trace(support::cpp14::make_unique<CLEdgeTraceKernel>()),
49 _gx(),
50 _gy(),
51 _mag(),
52 _phase(),
53 _nonmax(),
54 _visited(),
55 _recorded(),
56 _l1_list_counter(),
57 _l1_stack(),
58 _output(nullptr)
59 {
60 }
61
62 CLCannyEdge::~CLCannyEdge() = default;
63
configure(ICLTensor * input,ICLTensor * output,int32_t upper_thr,int32_t lower_thr,int32_t gradient_size,int32_t norm_type,BorderMode border_mode,uint8_t constant_border_value)64 void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
65 uint8_t constant_border_value)
66 {
67 configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, gradient_size, norm_type, border_mode, constant_border_value);
68 }
69
configure(const CLCompileContext & compile_context,ICLTensor * input,ICLTensor * output,int32_t upper_thr,int32_t lower_thr,int32_t gradient_size,int32_t norm_type,BorderMode border_mode,uint8_t constant_border_value)70 void CLCannyEdge::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type,
71 BorderMode border_mode,
72 uint8_t constant_border_value)
73 {
74 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
75 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
76 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
77 ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
78 ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7));
79 ARM_COMPUTE_ERROR_ON((lower_thr < 0) || (lower_thr >= upper_thr));
80
81 _output = output;
82
83 const unsigned int L1_hysteresis_stack_size = 8;
84 const TensorShape shape = input->info()->tensor_shape();
85
86 TensorInfo gradient_info;
87 TensorInfo info;
88
89 // Initialize images
90 if(gradient_size < 7)
91 {
92 gradient_info.init(shape, 1, arm_compute::DataType::S16);
93 info.init(shape, 1, arm_compute::DataType::U16);
94 }
95 else
96 {
97 gradient_info.init(shape, 1, arm_compute::DataType::S32);
98 info.init(shape, 1, arm_compute::DataType::U32);
99 }
100
101 _gx.allocator()->init(gradient_info);
102 _gy.allocator()->init(gradient_info);
103 _mag.allocator()->init(info);
104 _nonmax.allocator()->init(info);
105
106 TensorInfo info_u8(shape, 1, arm_compute::DataType::U8);
107 _phase.allocator()->init(info_u8);
108 _l1_list_counter.allocator()->init(info_u8);
109
110 TensorInfo info_u32(shape, 1, arm_compute::DataType::U32);
111 _visited.allocator()->init(info_u32);
112 _recorded.allocator()->init(info_u32);
113
114 TensorShape shape_l1_stack = input->info()->tensor_shape();
115 shape_l1_stack.set(0, input->info()->dimension(0) * L1_hysteresis_stack_size);
116 TensorInfo info_s32(shape_l1_stack, 1, arm_compute::DataType::S32);
117 _l1_stack.allocator()->init(info_s32);
118
119 // Manage intermediate buffers
120 _memory_group.manage(&_gx);
121 _memory_group.manage(&_gy);
122
123 // Configure/Init sobelNxN
124 if(gradient_size == 3)
125 {
126 auto k = arm_compute::support::cpp14::make_unique<CLSobel3x3>();
127 k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
128 _sobel = std::move(k);
129 }
130 else if(gradient_size == 5)
131 {
132 auto k = arm_compute::support::cpp14::make_unique<CLSobel5x5>();
133 k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
134 _sobel = std::move(k);
135 }
136 else if(gradient_size == 7)
137 {
138 auto k = arm_compute::support::cpp14::make_unique<CLSobel7x7>();
139 k->configure(compile_context, input, &_gx, &_gy, border_mode, constant_border_value);
140 _sobel = std::move(k);
141 }
142 else
143 {
144 ARM_COMPUTE_ERROR_VAR("Gradient size %d not supported", gradient_size);
145 }
146
147 // Manage intermediate buffers
148 _memory_group.manage(&_mag);
149 _memory_group.manage(&_phase);
150
151 // Configure gradient
152 _gradient->configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
153
154 // Allocate intermediate buffers
155 _gx.allocator()->allocate();
156 _gy.allocator()->allocate();
157
158 // Manage intermediate buffers
159 _memory_group.manage(&_nonmax);
160
161 // Configure non-maxima suppression
162 _non_max_suppr->configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
163
164 // Allocate intermediate buffers
165 _phase.allocator()->allocate();
166
167 // Fill border around magnitude image as non-maxima suppression will access
168 // it. If border mode is undefined filling the border is a nop.
169 _border_mag_gradient->configure(compile_context, &_mag, _non_max_suppr->border_size(), border_mode, constant_border_value);
170
171 // Allocate intermediate buffers
172 _mag.allocator()->allocate();
173
174 // Manage intermediate buffers
175 _memory_group.manage(&_visited);
176 _memory_group.manage(&_recorded);
177 _memory_group.manage(&_l1_stack);
178 _memory_group.manage(&_l1_list_counter);
179
180 // Configure edge tracing
181 _edge_trace->configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
182
183 // Allocate intermediate buffers
184 _visited.allocator()->allocate();
185 _recorded.allocator()->allocate();
186 _l1_stack.allocator()->allocate();
187 _l1_list_counter.allocator()->allocate();
188 _nonmax.allocator()->allocate();
189 }
190
run()191 void CLCannyEdge::run()
192 {
193 MemoryGroupResourceScope scope_mg(_memory_group);
194
195 // Run sobel
196 _sobel->run();
197
198 // Run phase and magnitude calculation
199 CLScheduler::get().enqueue(*_gradient, false);
200
201 // Fill border before non-maxima suppression. Nop for border mode undefined.
202 CLScheduler::get().enqueue(*_border_mag_gradient, false);
203
204 // Run non max suppresion
205 _nonmax.clear(CLScheduler::get().queue());
206 CLScheduler::get().enqueue(*_non_max_suppr, false);
207
208 // Clear temporary structures and run edge trace
209 _output->clear(CLScheduler::get().queue());
210 _visited.clear(CLScheduler::get().queue());
211 _recorded.clear(CLScheduler::get().queue());
212 _l1_list_counter.clear(CLScheduler::get().queue());
213 _l1_stack.clear(CLScheduler::get().queue());
214 CLScheduler::get().enqueue(*_edge_trace, true);
215 }
216