• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
25 
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/IAccessWindow.h"
29 #include "arm_compute/core/ITensor.h"
30 #include "arm_compute/core/TensorInfo.h"
31 #include "arm_compute/core/Types.h"
32 #include "arm_compute/core/Validate.h"
33 #include "src/core/helpers/AutoConfiguration.h"
34 #include "src/core/helpers/WindowHelpers.h"
35 
36 #include <arm_neon.h>
37 
38 namespace arm_compute
39 {
40 namespace
41 {
abs_diff_U8_U8_U8(const ITensor * in1,const ITensor * in2,ITensor * out,const Window & window)42 void abs_diff_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
43 {
44     Iterator input1(in1, window);
45     Iterator input2(in2, window);
46     Iterator output(out, window);
47 
48     execute_window_loop(window, [&](const Coordinates &)
49     {
50         const uint8x16_t input1_val = vld1q_u8(input1.ptr());
51         const uint8x16_t input2_val = vld1q_u8(input2.ptr());
52 
53         vst1q_u8(output.ptr(), vabdq_u8(input1_val, input2_val));
54     },
55     input1, input2, output);
56 }
57 
vqabd2q_s16(const int16x8x2_t & v1,const int16x8x2_t & v2)58 inline int16x8x2_t vqabd2q_s16(const int16x8x2_t &v1, const int16x8x2_t &v2)
59 {
60     const int16x8x2_t res =
61     {
62         {
63             vqabsq_s16(vqsubq_s16(v1.val[0], v2.val[0])),
64             vqabsq_s16(vqsubq_s16(v1.val[1], v2.val[1]))
65         }
66     };
67 
68     return res;
69 }
70 
abs_diff_S16_S16_S16(const ITensor * in1,const ITensor * in2,ITensor * out,const Window & window)71 void abs_diff_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
72 {
73     Iterator input1(in1, window);
74     Iterator input2(in2, window);
75     Iterator output(out, window);
76 
77     execute_window_loop(window, [&](const Coordinates &)
78     {
79         int16x8x2_t input1_val = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
80         int16x8x2_t input2_val = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
81         vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), vqabd2q_s16(input1_val, input2_val));
82     },
83     input1, input2, output);
84 }
85 
abs_diff_U8_S16_S16(const ITensor * in1,const ITensor * in2,ITensor * out,const Window & window)86 void abs_diff_U8_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
87 {
88     Iterator input1(in1, window);
89     Iterator input2(in2, window);
90     Iterator output(out, window);
91 
92     execute_window_loop(window, [&](const Coordinates &)
93     {
94         const uint8x16_t  input1_val = vld1q_u8(input1.ptr());
95         const int16x8x2_t input2_val =
96         {
97             {
98                 vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr())),
99                 vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8)
100             }
101         };
102 
103         const int16x8x2_t out_val =
104         {
105             {
106                 vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input1_val))), input2_val.val[0])),
107                 vqabsq_s16(vqsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(input1_val))), input2_val.val[1]))
108             }
109         };
110 
111         vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), out_val.val[0]);
112         vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, out_val.val[1]);
113 
114     },
115     input1, input2, output);
116 }
117 
abs_diff_S16_U8_S16(const ITensor * in1,const ITensor * in2,ITensor * out,const Window & window)118 void abs_diff_S16_U8_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
119 {
120     abs_diff_U8_S16_S16(in2, in1, out, window);
121 }
122 } // namespace
123 
NEAbsoluteDifferenceKernel()124 NEAbsoluteDifferenceKernel::NEAbsoluteDifferenceKernel()
125     : _func(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
126 {
127 }
128 
configure(const ITensor * input1,const ITensor * input2,ITensor * output)129 void NEAbsoluteDifferenceKernel::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
130 {
131     ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
132 
133     set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
134 
135     if(input1->info()->data_type() == DataType::S16 || input2->info()->data_type() == DataType::S16)
136     {
137         set_format_if_unknown(*output->info(), Format::S16);
138     }
139     else if(input1->info()->data_type() == DataType::U8 || input2->info()->data_type() == DataType::U8)
140     {
141         set_format_if_unknown(*output->info(), Format::U8);
142     }
143 
144     ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
145     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16);
146     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16);
147     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
148     ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8),
149                              "The output image can only be U8 if both input images are U8");
150 
151     _input1 = input1;
152     _input2 = input2;
153     _output = output;
154 
155     const DataType input1_data_type = input1->info()->data_type();
156     const DataType input2_data_type = input2->info()->data_type();
157 
158     if(input1_data_type == input2_data_type)
159     {
160         if(input1_data_type == DataType::U8)
161         {
162             _func = &abs_diff_U8_U8_U8;
163         }
164         else
165         {
166             _func = &abs_diff_S16_S16_S16;
167         }
168     }
169     else
170     {
171         if(input1_data_type == DataType::U8)
172         {
173             _func = &abs_diff_U8_S16_S16;
174         }
175         else
176         {
177             _func = &abs_diff_S16_U8_S16;
178         }
179     }
180 
181     constexpr unsigned int num_elems_processed_per_iteration = 16;
182 
183     // Configure kernel window
184     Window                 win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
185     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
186 
187     update_window_and_padding(win,
188                               AccessWindowHorizontal(input1->info(), 0, num_elems_processed_per_iteration),
189                               AccessWindowHorizontal(input2->info(), 0, num_elems_processed_per_iteration),
190                               output_access);
191 
192     ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
193                                                        input2->info()->valid_region());
194 
195     output_access.set_valid_region(win, valid_region);
196 
197     INEKernel::configure(win);
198 }
199 
run(const Window & window,const ThreadInfo & info)200 void NEAbsoluteDifferenceKernel::run(const Window &window, const ThreadInfo &info)
201 {
202     ARM_COMPUTE_UNUSED(info);
203     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
204     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
205     ARM_COMPUTE_ERROR_ON(_func == nullptr);
206 
207     _func(_input1, _input2, _output, window);
208 }
209 } // namespace arm_compute
210