1 /*
2 * Copyright (c) 2018-2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
25
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/ITensor.h"
29 #include "arm_compute/core/Validate.h"
30 #include "src/core/CPP/Validate.h"
31 #include "src/core/NEON/wrapper/wrapper.h"
32 #include "src/core/helpers/AutoConfiguration.h"
33 #include "src/core/helpers/WindowHelpers.h"
34 #include "support/ToolchainSupport.h"
35
36 namespace arm_compute
37 {
38 namespace
39 {
40 template <typename ScalarType>
elementwise_op_scalar_imp(ElementWiseUnary op,const ScalarType & a)41 inline ScalarType elementwise_op_scalar_imp(ElementWiseUnary op, const ScalarType &a)
42 {
43 switch(op)
44 {
45 case ElementWiseUnary::RSQRT:
46 return 1 / sqrt(a);
47 case ElementWiseUnary::EXP:
48 return std::exp(a);
49 case ElementWiseUnary::NEG:
50 return -a;
51 case ElementWiseUnary::LOG:
52 return std::log(a);
53 case ElementWiseUnary::ABS:
54 return std::abs(a);
55 case ElementWiseUnary::ROUND:
56 return support::cpp11::nearbyint(a);
57 case ElementWiseUnary::SIN:
58 return std::sin(a);
59 default:
60 ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
61 }
62 }
63
64 template <typename ScalarType, typename VectorType>
elementwise_op_imp(ElementWiseUnary op,const VectorType & a)65 inline VectorType elementwise_op_imp(ElementWiseUnary op, const VectorType &a)
66 {
67 switch(op)
68 {
69 case ElementWiseUnary::RSQRT:
70 return wrapper::vinvsqrt(a);
71 case ElementWiseUnary::EXP:
72 return wrapper::vexpq(a);
73 case ElementWiseUnary::NEG:
74 return wrapper::vneg(a);
75 case ElementWiseUnary::LOG:
76 return wrapper::vlog(a);
77 case ElementWiseUnary::ABS:
78 return wrapper::vabs(a);
79 case ElementWiseUnary::ROUND:
80 return wrapper::vround(a);
81 case ElementWiseUnary::SIN:
82 return wrapper::vsin(a);
83 default:
84 ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
85 }
86 }
87 } // namespace
88
89 template <typename ScalarType>
elementwise_op(const Window & window)90 void NEElementwiseUnaryKernel::elementwise_op(const Window &window)
91 {
92 const int window_step_x = 16 / sizeof(ScalarType);
93 const auto window_start_x = static_cast<int>(window.x().start());
94 const auto window_end_x = static_cast<int>(window.x().end());
95
96 Window win = window;
97 win.set(Window::DimX, Window::Dimension(0, 1, 1));
98
99 Iterator input(_input, win);
100 Iterator output(_output, win);
101
102 execute_window_loop(win, [&](const Coordinates &)
103 {
104 auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
105 const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
106
107 int x = window_start_x;
108 for(; x <= window_end_x - window_step_x; x += window_step_x)
109 {
110 wrapper::vstore(output_ptr + x, elementwise_op_imp<ScalarType>(_op, wrapper::vloadq(input_ptr + x)));
111 }
112 for(; x < window_end_x; ++x)
113 {
114 *(output_ptr + x) = elementwise_op_scalar_imp(_op, *(input_ptr + x));
115 }
116 },
117 input, output);
118 }
119
NEElementwiseUnaryKernel()120 NEElementwiseUnaryKernel::NEElementwiseUnaryKernel()
121 : _func(nullptr), _input(nullptr), _output(nullptr), _op()
122 {
123 }
124
configure(ElementWiseUnary op,const ITensor * input,ITensor * output)125 void NEElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensor *input, ITensor *output)
126 {
127 ARM_COMPUTE_ERROR_THROW_ON(validate(op, input->info(), output->info()));
128 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
129
130 // Configure kernel window
131 const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input->info());
132 const TensorShape &out_shape = broadcast_pair.first;
133 const ValidRegion &valid_region = broadcast_pair.second;
134
135 // Auto initialize output if not initialized
136 auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
137
138 Window win = calculate_max_window(valid_region);
139
140 _input = input;
141 _output = output;
142 _op = op;
143
144 INEKernel::configure(win);
145
146 switch(input->info()->data_type())
147 {
148 case DataType::F32:
149 _func = &NEElementwiseUnaryKernel::elementwise_op<float>;
150 break;
151 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
152 case DataType::F16:
153 _func = &NEElementwiseUnaryKernel::elementwise_op<float16_t>;
154 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
155 break;
156 case DataType::S32:
157 _func = &NEElementwiseUnaryKernel::elementwise_op<int32_t>;
158 break;
159 default:
160 ARM_COMPUTE_ERROR("DataType not supported");
161 }
162 }
163
validate(ElementWiseUnary op,const ITensorInfo * input,const ITensorInfo * output)164 Status NEElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output)
165 {
166 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
167 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
168 switch(op)
169 {
170 case ElementWiseUnary::EXP:
171 case ElementWiseUnary::RSQRT:
172 case ElementWiseUnary::LOG:
173 case ElementWiseUnary::ROUND:
174 case ElementWiseUnary::SIN:
175 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
176 break;
177 case ElementWiseUnary::NEG:
178 case ElementWiseUnary::ABS:
179 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::S32);
180 break;
181 default:
182 ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported");
183 }
184 // Validate in case of configured output
185 if(output->total_size() > 0)
186 {
187 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
188 }
189
190 return Status{};
191 }
192
run(const Window & window,const ThreadInfo & info)193 void NEElementwiseUnaryKernel::run(const Window &window, const ThreadInfo &info)
194 {
195 ARM_COMPUTE_UNUSED(info);
196 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
197 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
198 ARM_COMPUTE_ERROR_ON(_func == nullptr);
199 (this->*_func)(window);
200 }
201 } // namespace arm_compute
202