• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
25 
26 #include "arm_compute/core/ITensor.h"
27 #include "arm_compute/core/TensorInfo.h"
28 #include "arm_compute/core/Types.h"
29 #include "arm_compute/core/Validate.h"
30 #include "arm_compute/core/Window.h"
31 #include "src/core/helpers/AutoConfiguration.h"
32 #include "src/core/helpers/WindowHelpers.h"
33 
34 #include <set>
35 
36 namespace arm_compute
37 {
38 namespace
39 {
validate_arguments(const ITensorInfo * input,const ITensorInfo * output,const ITensorInfo * idx,const FFTDigitReverseKernelInfo & config)40 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
41 {
42     ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32);
43     ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() > 2);
44     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(idx, 1, DataType::U32);
45     ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
46     ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] != idx->tensor_shape().x());
47 
48     // Checks performed when output is configured
49     if((output != nullptr) && (output->total_size() != 0))
50     {
51         ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 2);
52         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
53         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
54     }
55 
56     return Status{};
57 }
58 
validate_and_configure_window(ITensorInfo * input,ITensorInfo * output,ITensorInfo * idx,const FFTDigitReverseKernelInfo & config)59 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
60 {
61     ARM_COMPUTE_UNUSED(idx, config);
62 
63     auto_init_if_empty(*output, input->clone()->set_num_channels(2));
64 
65     Window win = calculate_max_window(*input, Steps());
66     input->set_valid_region(ValidRegion(Coordinates(), input->tensor_shape()));
67 
68     return std::make_pair(Status{}, win);
69 }
70 } // namespace
71 
NEFFTDigitReverseKernel()72 NEFFTDigitReverseKernel::NEFFTDigitReverseKernel()
73     : _func(nullptr), _input(nullptr), _output(nullptr), _idx(nullptr)
74 {
75 }
76 
configure(const ITensor * input,ITensor * output,const ITensor * idx,const FFTDigitReverseKernelInfo & config)77 void NEFFTDigitReverseKernel::configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config)
78 {
79     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, idx);
80     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), idx->info(), config));
81 
82     _input  = input;
83     _output = output;
84     _idx    = idx;
85 
86     const size_t axis             = config.axis;
87     const bool   is_conj          = config.conjugate;
88     const bool   is_input_complex = (input->info()->num_channels() == 2);
89 
90     // Configure kernel window
91     auto win_config = validate_and_configure_window(input->info(), output->info(), idx->info(), config);
92     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
93     INEKernel::configure(win_config.second);
94 
95     if(axis == 0)
96     {
97         if(is_input_complex)
98         {
99             if(is_conj)
100             {
101                 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<true, true>;
102             }
103             else
104             {
105                 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<true, false>;
106             }
107         }
108         else
109         {
110             _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0<false, false>;
111         }
112     }
113     else if(axis == 1)
114     {
115         if(is_input_complex)
116         {
117             if(is_conj)
118             {
119                 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<true, true>;
120             }
121             else
122             {
123                 _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<true, false>;
124             }
125         }
126         else
127         {
128             _func = &NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1<false, false>;
129         }
130     }
131     else
132     {
133         ARM_COMPUTE_ERROR("Not supported");
134     }
135 }
136 
validate(const ITensorInfo * input,const ITensorInfo * output,const ITensorInfo * idx,const FFTDigitReverseKernelInfo & config)137 Status NEFFTDigitReverseKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
138 {
139     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, idx, config));
140     ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), idx->clone().get(), config).first);
141     return Status{};
142 }
143 
144 template <bool is_input_complex, bool is_conj>
digit_reverse_kernel_axis_0(const Window & window)145 void NEFFTDigitReverseKernel::digit_reverse_kernel_axis_0(const Window &window)
146 {
147     const size_t N = _input->info()->dimension(0);
148 
149     // Copy the look-up buffer to a local array
150     std::vector<unsigned int> buffer_idx(N);
151     std::copy_n(reinterpret_cast<unsigned int *>(_idx->buffer()), N, buffer_idx.data());
152 
153     // Input/output iterators
154     Window slice = window;
155     slice.set(0, Window::DimX);
156     Iterator in(_input, slice);
157     Iterator out(_output, slice);
158 
159     // Row buffers
160     std::vector<float> buffer_row_out(2 * N);
161     std::vector<float> buffer_row_in(2 * N);
162 
163     execute_window_loop(slice, [&](const Coordinates &)
164     {
165         if(is_input_complex)
166         {
167             // Load
168             memcpy(buffer_row_in.data(), reinterpret_cast<float *>(in.ptr()), 2 * N * sizeof(float));
169 
170             // Shuffle
171             for(size_t x = 0; x < 2 * N; x += 2)
172             {
173                 size_t idx            = buffer_idx[x / 2];
174                 buffer_row_out[x]     = buffer_row_in[2 * idx];
175                 buffer_row_out[x + 1] = (is_conj ? -buffer_row_in[2 * idx + 1] : buffer_row_in[2 * idx + 1]);
176             }
177         }
178         else
179         {
180             // Load
181             memcpy(buffer_row_in.data(), reinterpret_cast<float *>(in.ptr()), N * sizeof(float));
182 
183             // Shuffle
184             for(size_t x = 0; x < N; ++x)
185             {
186                 size_t idx            = buffer_idx[x];
187                 buffer_row_out[2 * x] = buffer_row_in[idx];
188             }
189         }
190 
191         // Copy back
192         memcpy(reinterpret_cast<float *>(out.ptr()), buffer_row_out.data(), 2 * N * sizeof(float));
193     },
194     in, out);
195 }
196 
197 template <bool is_input_complex, bool is_conj>
digit_reverse_kernel_axis_1(const Window & window)198 void NEFFTDigitReverseKernel::digit_reverse_kernel_axis_1(const Window &window)
199 {
200     const size_t Nx = _input->info()->dimension(0);
201     const size_t Ny = _input->info()->dimension(1);
202 
203     // Copy the look-up buffer to a local array
204     std::vector<unsigned int> buffer_idx(Ny);
205     std::copy_n(reinterpret_cast<unsigned int *>(_idx->buffer()), Ny, buffer_idx.data());
206 
207     // Output iterator
208     Window slice = window;
209     slice.set(0, Window::DimX);
210     Iterator out(_output, slice);
211 
212     // Row buffer
213     std::vector<float> buffer_row(Nx);
214 
215     // Strides
216     const size_t stride_z = _input->info()->strides_in_bytes()[2];
217     const size_t stride_w = _input->info()->strides_in_bytes()[3];
218 
219     execute_window_loop(slice, [&](const Coordinates & id)
220     {
221         auto        *out_ptr    = reinterpret_cast<float *>(out.ptr());
222         auto        *in_ptr     = reinterpret_cast<float *>(_input->buffer() + id.z() * stride_z + id[3] * stride_w);
223         const size_t y_shuffled = buffer_idx[id.y()];
224 
225         if(is_input_complex)
226         {
227             // Shuffle the entire row into the output
228             memcpy(out_ptr, in_ptr + 2 * Nx * y_shuffled, 2 * Nx * sizeof(float));
229 
230             // Conjugate if necessary
231             if(is_conj)
232             {
233                 for(size_t x = 0; x < 2 * Nx; x += 2)
234                 {
235                     out_ptr[x + 1] = -out_ptr[x + 1];
236                 }
237             }
238         }
239         else
240         {
241             // Shuffle the entire row into the buffer
242             memcpy(buffer_row.data(), in_ptr + Nx * y_shuffled, Nx * sizeof(float));
243 
244             // Copy the buffer to the output, with a zero imaginary part
245             for(size_t x = 0; x < 2 * Nx; x += 2)
246             {
247                 out_ptr[x] = buffer_row[x / 2];
248             }
249         }
250     },
251     out);
252 }
253 
run(const Window & window,const ThreadInfo & info)254 void NEFFTDigitReverseKernel::run(const Window &window, const ThreadInfo &info)
255 {
256     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
257     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
258     ARM_COMPUTE_UNUSED(info);
259     (this->*_func)(window);
260 }
261 
262 } // namespace arm_compute
263