1 /*
2 * Copyright (c) 2016-2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "src/core/NEON/kernels/NEFillBorderKernel.h"
25
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/ITensor.h"
29 #include "arm_compute/core/TensorInfo.h"
30 #include "arm_compute/core/Types.h"
31 #include "arm_compute/core/Validate.h"
32 #include "arm_compute/core/Window.h"
33 #include "src/core/NEON/kernels/NEFillBorderKernel.h"
34 #include "src/core/helpers/WindowHelpers.h"
35
36 #include <algorithm>
37 #include <cstdint>
38
39 namespace arm_compute
40 {
41 class Coordinates;
42 namespace
43 {
fill_constant_value_single_channel_special(ITensor * tensor,const Window & window,unsigned int right,unsigned int bottom,const PixelValue & constant_border_value)44 inline void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value)
45 {
46 float border_value;
47 constant_border_value.get(border_value);
48 uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
49 const size_t width = tensor->info()->valid_region().shape[0];
50 const size_t height = tensor->info()->valid_region().shape[1];
51 const int stridey = tensor->info()->strides_in_bytes()[1];
52
53 // Left and right border
54 Window vertical(window);
55 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
56
57 Iterator vertical_it(tensor, vertical);
58
59 execute_window_loop(vertical, [&](const Coordinates &)
60 {
61 const auto row_start = reinterpret_cast<float *>(start_valid_region + vertical_it.offset());
62
63 // Fill left and right borders
64 *(row_start - 1) = border_value;
65 std::fill_n(row_start + width, right, border_value);
66 },
67 vertical_it);
68
69 // Top and bottom border
70 Iterator plane_it(tensor, window);
71
72 // Iterate over all XY planes
73 execute_window_loop(window, [&](const Coordinates &)
74 {
75 uint8_t *base_addr = start_valid_region + plane_it.offset();
76 // Top border
77 const auto row_start = reinterpret_cast<float *>(base_addr - stridey);
78 // Fill top rows including left/right borders
79 std::fill_n(row_start - 1, 1 + width + right, border_value);
80
81 // Bottom border
82 const unsigned low_border_size = height + bottom;
83 for(unsigned int i = height; i < low_border_size; ++i)
84 {
85 const auto row_start = reinterpret_cast<float *>(base_addr + i * stridey);
86
87 // Fill bottom rows including left/right borders
88 std::fill_n(row_start - 1, 1 + width + right, border_value);
89 }
90 },
91 plane_it);
92 }
93 } // namespace
94
NEFillBorderKernel()95 NEFillBorderKernel::NEFillBorderKernel()
96 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast<float>(0.f))
97 {
98 }
99
configure(ITensor * tensor,BorderSize border_size,BorderMode border_mode,const PixelValue & constant_border_value)100 void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
101 {
102 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
103 //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
104 ARM_COMPUTE_ERROR_ON(tensor->info()->data_type() == DataType::UNKNOWN);
105
106 _tensor = tensor;
107 _border_size = border_size;
108 _mode = border_mode;
109 _constant_border_value = constant_border_value;
110
111 _border_size.limit(tensor->info()->padding());
112
113 Window win;
114 win.set(Window::DimX, Window::Dimension(0, 1, 1));
115 win.set(Window::DimY, Window::Dimension(0, 1, 1));
116 win.use_tensor_dimensions(_tensor->info()->tensor_shape(), Window::DimZ);
117 INEKernel::configure(win);
118 }
119
run(const Window & window,const ThreadInfo & info)120 void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
121 {
122 ARM_COMPUTE_UNUSED(info);
123
124 // If there is no border: early exit
125 if(_border_size.empty())
126 {
127 return;
128 }
129
130 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
131 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
132
133 switch(_mode)
134 {
135 case BorderMode::CONSTANT:
136 {
137 if(_border_size.left == 1 && _border_size.top == 1 && _tensor->info()->data_type() == DataType::F32)
138 {
139 fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value);
140 }
141 else
142 {
143 fill_constant_value_single_channel(window);
144 }
145 break;
146 }
147 case BorderMode::REPLICATE:
148 {
149 fill_replicate_single_channel(window);
150 break;
151 }
152 case BorderMode::UNDEFINED:
153 break; // Nothing to do here
154 default:
155 ARM_COMPUTE_ERROR("Unknown border mode");
156 }
157 }
158
fill_replicate_single_channel(const Window & window)159 void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
160 {
161 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
162 const size_t width = _tensor->info()->valid_region().shape[0];
163 const size_t height = _tensor->info()->valid_region().shape[1];
164 const size_t element_size = _tensor->info()->element_size();
165 // Left and right border
166 Window vertical(window);
167 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
168
169 Iterator vertical_it(_tensor, vertical);
170
171 execute_window_loop(vertical, [&](const Coordinates &)
172 {
173 uint8_t *base_addr = start_valid_region + vertical_it.offset();
174 // Fill left and right borders
175 for(unsigned int i = 0; i < _border_size.left; ++i)
176 {
177 std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, vertical_it.ptr(), element_size);
178 }
179
180 for(unsigned int i = 0; i < _border_size.right; ++i)
181 {
182 std::memcpy(base_addr + (width + i) * element_size, vertical_it.ptr() + (width - 1) * element_size, element_size);
183 }
184 },
185 vertical_it);
186
187 // Top and bottom border
188 Iterator plane_it(_tensor, window);
189
190 // Iterate over all XY planes
191 execute_window_loop(window, [&](const Coordinates &)
192 {
193 uint8_t *base_addr = start_valid_region + plane_it.offset();
194 // Top border
195 for(int i = -_border_size.top; i < 0; ++i)
196 {
197 // Copy top rows including left/right borders
198 std::memcpy(base_addr + i * static_cast<int>(_tensor->info()->strides_in_bytes()[1]) - _border_size.left * element_size,
199 base_addr - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size);
200 }
201
202 // Bottom border
203 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
204 {
205 // Copy bottom rows including left/right borders
206 std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size,
207 base_addr + (height - 1) * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size);
208 }
209 },
210 plane_it);
211 }
212
fill_constant_value_single_channel(const Window & window)213 void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
214 {
215 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
216 const size_t width = _tensor->info()->valid_region().shape[0];
217 const size_t height = _tensor->info()->valid_region().shape[1];
218 const int stridey = _tensor->info()->strides_in_bytes()[1];
219 const size_t element_size = _tensor->info()->element_size();
220
221 // Left and right border
222 Window vertical(window);
223 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
224
225 Iterator vertical_it(_tensor, vertical);
226
227 execute_window_loop(vertical, [&](const Coordinates &)
228 {
229 uint8_t *base_addr = start_valid_region + vertical_it.offset();
230 // Fill left and right borders
231 for(unsigned int i = 0; i < _border_size.left; ++i)
232 {
233 std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, &_constant_border_value, element_size);
234 }
235
236 for(unsigned int i = 0; i < _border_size.right; ++i)
237 {
238 std::memcpy(base_addr + (width + i) * element_size, &_constant_border_value, element_size);
239 }
240 },
241 vertical_it);
242
243 // Top and bottom border
244 Iterator plane_it(_tensor, window);
245
246 // Iterate over all XY planes
247 execute_window_loop(window, [&](const Coordinates &)
248 {
249 uint8_t *base_addr = start_valid_region + plane_it.offset();
250 // Top border
251 for(int i = -_border_size.top; i < 0; ++i)
252 {
253 // Fill top rows including left/right borders
254 for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
255 {
256 std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size);
257 }
258 }
259
260 // Bottom border
261 const unsigned low_border_size = height + _border_size.bottom;
262 for(unsigned int i = height; i < low_border_size; ++i)
263 {
264 // Fill bottom rows including left/right borders
265 for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
266 {
267 std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size);
268 }
269 }
270 },
271 plane_it);
272 }
273 } // namespace arm_compute
274