1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H 12 13 // This code requires the ability to initialize arrays of constant 14 // values directly inside a class. 15 #if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 16 17 namespace Eigen { 18 19 /** \class TensorFFT 20 * \ingroup CXX11_Tensor_Module 21 * 22 * \brief Tensor FFT class. 23 * 24 * TODO: 25 * Vectorize the Cooley Tukey and the Bluestein algorithm 26 * Add support for multithreaded evaluation 27 * Improve the performance on GPU 28 */ 29 30 template <bool NeedUprade> struct MakeComplex { 31 template <typename T> 32 EIGEN_DEVICE_FUNC operatorMakeComplex33 T operator() (const T& val) const { return val; } 34 }; 35 36 template <> struct MakeComplex<true> { 37 template <typename T> 38 EIGEN_DEVICE_FUNC 39 std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); } 40 }; 41 42 template <> struct MakeComplex<false> { 43 template <typename T> 44 EIGEN_DEVICE_FUNC 45 std::complex<T> operator() (const std::complex<T>& val) const { return val; } 46 }; 47 48 template <int ResultType> struct PartOf { 49 template <typename T> T operator() (const T& val) const { return val; } 50 }; 51 52 template <> struct PartOf<RealPart> { 53 template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); } 54 }; 55 56 template <> struct PartOf<ImagPart> { 57 template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); } 58 }; 59 60 namespace internal { 61 template <typename FFT, typename XprType, int FFTResultType, int FFTDir> 62 struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> { 63 typedef traits<XprType> XprTraits; 64 typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar; 65 typedef typename std::complex<RealScalar> ComplexScalar; 66 typedef typename XprTraits::Scalar InputScalar; 67 typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; 68 typedef typename XprTraits::StorageKind StorageKind; 69 typedef typename XprTraits::Index Index; 70 typedef typename XprType::Nested Nested; 71 typedef typename remove_reference<Nested>::type _Nested; 72 static const int NumDimensions = XprTraits::NumDimensions; 73 static const int Layout = XprTraits::Layout; 74 }; 75 76 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> 77 struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> { 78 typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type; 79 }; 80 81 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> 82 struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> { 83 typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type; 84 }; 85 86 } // end namespace internal 87 88 template <typename FFT, typename XprType, int FFTResultType, int FFTDir> 89 class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> { 90 public: 91 typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar; 92 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 93 typedef typename std::complex<RealScalar> ComplexScalar; 94 typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; 95 typedef OutputScalar CoeffReturnType; 96 typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested; 97 typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind; 98 typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index; 99 100 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) 101 : m_xpr(expr), m_fft(fft) {} 102 103 EIGEN_DEVICE_FUNC 104 const FFT& fft() const { return m_fft; } 105 106 EIGEN_DEVICE_FUNC 107 const typename internal::remove_all<typename XprType::Nested>::type& expression() const { 108 return m_xpr; 109 } 110 111 protected: 112 typename XprType::Nested m_xpr; 113 const FFT m_fft; 114 }; 115 116 // Eval as rvalue 117 template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir> 118 struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> { 119 typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType; 120 typedef typename XprType::Index Index; 121 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 122 typedef DSizes<Index, NumDims> Dimensions; 123 typedef typename XprType::Scalar Scalar; 124 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 125 typedef typename std::complex<RealScalar> ComplexScalar; 126 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; 127 typedef internal::traits<XprType> XprTraits; 128 typedef typename XprTraits::Scalar InputScalar; 129 typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; 130 typedef OutputScalar CoeffReturnType; 131 typedef typename PacketType<OutputScalar, Device>::type PacketReturnType; 132 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; 133 134 enum { 135 IsAligned = false, 136 PacketAccess = true, 137 BlockAccess = false, 138 Layout = TensorEvaluator<ArgType, Device>::Layout, 139 CoordAccess = false, 140 RawAccess = false 141 }; 142 143 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { 144 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 145 for (int i = 0; i < NumDims; ++i) { 146 eigen_assert(input_dims[i] > 0); 147 m_dimensions[i] = input_dims[i]; 148 } 149 150 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 151 m_strides[0] = 1; 152 for (int i = 1; i < NumDims; ++i) { 153 m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; 154 } 155 } else { 156 m_strides[NumDims - 1] = 1; 157 for (int i = NumDims - 2; i >= 0; --i) { 158 m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; 159 } 160 } 161 m_size = m_dimensions.TotalSize(); 162 } 163 164 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { 165 return m_dimensions; 166 } 167 168 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { 169 m_impl.evalSubExprsIfNeeded(NULL); 170 if (data) { 171 evalToBuf(data); 172 return false; 173 } else { 174 m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); 175 evalToBuf(m_data); 176 return true; 177 } 178 } 179 180 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 181 if (m_data) { 182 m_device.deallocate(m_data); 183 m_data = NULL; 184 } 185 m_impl.cleanup(); 186 } 187 188 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { 189 return m_data[index]; 190 } 191 192 template <int LoadMode> 193 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType 194 packet(Index index) const { 195 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); 196 } 197 198 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 199 costPerCoeff(bool vectorized) const { 200 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); 201 } 202 203 EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } 204 205 206 private: 207 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { 208 const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value; 209 ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); 210 211 for (Index i = 0; i < m_size; ++i) { 212 buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i)); 213 } 214 215 for (size_t i = 0; i < m_fft.size(); ++i) { 216 Index dim = m_fft[i]; 217 eigen_assert(dim >= 0 && dim < NumDims); 218 Index line_len = m_dimensions[dim]; 219 eigen_assert(line_len >= 1); 220 ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); 221 const bool is_power_of_two = isPowerOfTwo(line_len); 222 const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); 223 const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); 224 225 ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); 226 ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); 227 ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); 228 if (!is_power_of_two) { 229 // Compute twiddle factors 230 // t_n = exp(sqrt(-1) * pi * n^2 / line_len) 231 // for n = 0, 1,..., line_len-1. 232 // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 233 pos_j_base_powered[0] = ComplexScalar(1, 0); 234 if (line_len > 1) { 235 const RealScalar pi_over_len(EIGEN_PI / line_len); 236 const ComplexScalar pos_j_base = ComplexScalar( 237 std::cos(pi_over_len), std::sin(pi_over_len)); 238 pos_j_base_powered[1] = pos_j_base; 239 if (line_len > 2) { 240 const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; 241 for (int j = 2; j < line_len + 1; ++j) { 242 pos_j_base_powered[j] = pos_j_base_powered[j - 1] * 243 pos_j_base_powered[j - 1] / 244 pos_j_base_powered[j - 2] * pos_j_base_sq; 245 } 246 } 247 } 248 } 249 250 for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { 251 const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); 252 253 // get data into line_buf 254 const Index stride = m_strides[dim]; 255 if (stride == 1) { 256 memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); 257 } else { 258 Index offset = base_offset; 259 for (int j = 0; j < line_len; ++j, offset += stride) { 260 line_buf[j] = buf[offset]; 261 } 262 } 263 264 // processs the line 265 if (is_power_of_two) { 266 processDataLineCooleyTukey(line_buf, line_len, log_len); 267 } 268 else { 269 processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); 270 } 271 272 // write back 273 if (FFTDir == FFT_FORWARD && stride == 1) { 274 memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); 275 } else { 276 Index offset = base_offset; 277 const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); 278 for (int j = 0; j < line_len; ++j, offset += stride) { 279 buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; 280 } 281 } 282 } 283 m_device.deallocate(line_buf); 284 if (!is_power_of_two) { 285 m_device.deallocate(a); 286 m_device.deallocate(b); 287 m_device.deallocate(pos_j_base_powered); 288 } 289 } 290 291 if(!write_to_out) { 292 for (Index i = 0; i < m_size; ++i) { 293 data[i] = PartOf<FFTResultType>()(buf[i]); 294 } 295 m_device.deallocate(buf); 296 } 297 } 298 299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { 300 eigen_assert(x > 0); 301 return !(x & (x - 1)); 302 } 303 304 // The composite number for padding, used in Bluestein's FFT algorithm 305 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { 306 Index i = 2; 307 while (i < 2 * n - 1) i *= 2; 308 return i; 309 } 310 311 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { 312 Index log2m = 0; 313 while (m >>= 1) log2m++; 314 return log2m; 315 } 316 317 // Call Cooley Tukey algorithm directly, data length must be power of 2 318 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { 319 eigen_assert(isPowerOfTwo(line_len)); 320 scramble_FFT(line_buf, line_len); 321 compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len); 322 } 323 324 // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length 325 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { 326 Index n = line_len; 327 Index m = good_composite; 328 ComplexScalar* data = line_buf; 329 330 for (Index i = 0; i < n; ++i) { 331 if(FFTDir == FFT_FORWARD) { 332 a[i] = data[i] * numext::conj(pos_j_base_powered[i]); 333 } 334 else { 335 a[i] = data[i] * pos_j_base_powered[i]; 336 } 337 } 338 for (Index i = n; i < m; ++i) { 339 a[i] = ComplexScalar(0, 0); 340 } 341 342 for (Index i = 0; i < n; ++i) { 343 if(FFTDir == FFT_FORWARD) { 344 b[i] = pos_j_base_powered[i]; 345 } 346 else { 347 b[i] = numext::conj(pos_j_base_powered[i]); 348 } 349 } 350 for (Index i = n; i < m - n; ++i) { 351 b[i] = ComplexScalar(0, 0); 352 } 353 for (Index i = m - n; i < m; ++i) { 354 if(FFTDir == FFT_FORWARD) { 355 b[i] = pos_j_base_powered[m-i]; 356 } 357 else { 358 b[i] = numext::conj(pos_j_base_powered[m-i]); 359 } 360 } 361 362 scramble_FFT(a, m); 363 compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len); 364 365 scramble_FFT(b, m); 366 compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len); 367 368 for (Index i = 0; i < m; ++i) { 369 a[i] *= b[i]; 370 } 371 372 scramble_FFT(a, m); 373 compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len); 374 375 //Do the scaling after ifft 376 for (Index i = 0; i < m; ++i) { 377 a[i] /= m; 378 } 379 380 for (Index i = 0; i < n; ++i) { 381 if(FFTDir == FFT_FORWARD) { 382 data[i] = a[i] * numext::conj(pos_j_base_powered[i]); 383 } 384 else { 385 data[i] = a[i] * pos_j_base_powered[i]; 386 } 387 } 388 } 389 390 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { 391 eigen_assert(isPowerOfTwo(n)); 392 Index j = 1; 393 for (Index i = 1; i < n; ++i){ 394 if (j > i) { 395 std::swap(data[j-1], data[i-1]); 396 } 397 Index m = n >> 1; 398 while (m >= 2 && j > m) { 399 j -= m; 400 m >>= 1; 401 } 402 j += m; 403 } 404 } 405 406 template <int Dir> 407 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { 408 ComplexScalar tmp = data[1]; 409 data[1] = data[0] - data[1]; 410 data[0] += tmp; 411 } 412 413 template <int Dir> 414 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { 415 ComplexScalar tmp[4]; 416 tmp[0] = data[0] + data[1]; 417 tmp[1] = data[0] - data[1]; 418 tmp[2] = data[2] + data[3]; 419 if (Dir == FFT_FORWARD) { 420 tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); 421 } else { 422 tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); 423 } 424 data[0] = tmp[0] + tmp[2]; 425 data[1] = tmp[1] + tmp[3]; 426 data[2] = tmp[0] - tmp[2]; 427 data[3] = tmp[1] - tmp[3]; 428 } 429 430 template <int Dir> 431 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { 432 ComplexScalar tmp_1[8]; 433 ComplexScalar tmp_2[8]; 434 435 tmp_1[0] = data[0] + data[1]; 436 tmp_1[1] = data[0] - data[1]; 437 tmp_1[2] = data[2] + data[3]; 438 if (Dir == FFT_FORWARD) { 439 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); 440 } else { 441 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); 442 } 443 tmp_1[4] = data[4] + data[5]; 444 tmp_1[5] = data[4] - data[5]; 445 tmp_1[6] = data[6] + data[7]; 446 if (Dir == FFT_FORWARD) { 447 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); 448 } else { 449 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); 450 } 451 tmp_2[0] = tmp_1[0] + tmp_1[2]; 452 tmp_2[1] = tmp_1[1] + tmp_1[3]; 453 tmp_2[2] = tmp_1[0] - tmp_1[2]; 454 tmp_2[3] = tmp_1[1] - tmp_1[3]; 455 tmp_2[4] = tmp_1[4] + tmp_1[6]; 456 // SQRT2DIV2 = sqrt(2)/2 457 #define SQRT2DIV2 0.7071067811865476 458 if (Dir == FFT_FORWARD) { 459 tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); 460 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); 461 tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); 462 } else { 463 tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); 464 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); 465 tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); 466 } 467 data[0] = tmp_2[0] + tmp_2[4]; 468 data[1] = tmp_2[1] + tmp_2[5]; 469 data[2] = tmp_2[2] + tmp_2[6]; 470 data[3] = tmp_2[3] + tmp_2[7]; 471 data[4] = tmp_2[0] - tmp_2[4]; 472 data[5] = tmp_2[1] - tmp_2[5]; 473 data[6] = tmp_2[2] - tmp_2[6]; 474 data[7] = tmp_2[3] - tmp_2[7]; 475 } 476 477 template <int Dir> 478 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( 479 ComplexScalar* data, Index n, Index n_power_of_2) { 480 // Original code: 481 // RealScalar wtemp = std::sin(M_PI/n); 482 // RealScalar wpi = -std::sin(2 * M_PI/n); 483 const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; 484 const RealScalar wpi = (Dir == FFT_FORWARD) 485 ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] 486 : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; 487 488 const ComplexScalar wp(wtemp, wpi); 489 const ComplexScalar wp_one = wp + ComplexScalar(1, 0); 490 const ComplexScalar wp_one_2 = wp_one * wp_one; 491 const ComplexScalar wp_one_3 = wp_one_2 * wp_one; 492 const ComplexScalar wp_one_4 = wp_one_3 * wp_one; 493 const Index n2 = n / 2; 494 ComplexScalar w(1.0, 0.0); 495 for (Index i = 0; i < n2; i += 4) { 496 ComplexScalar temp0(data[i + n2] * w); 497 ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); 498 ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); 499 ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); 500 w = w * wp_one_4; 501 502 data[i + n2] = data[i] - temp0; 503 data[i] += temp0; 504 505 data[i + 1 + n2] = data[i + 1] - temp1; 506 data[i + 1] += temp1; 507 508 data[i + 2 + n2] = data[i + 2] - temp2; 509 data[i + 2] += temp2; 510 511 data[i + 3 + n2] = data[i + 3] - temp3; 512 data[i + 3] += temp3; 513 } 514 } 515 516 template <int Dir> 517 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( 518 ComplexScalar* data, Index n, Index n_power_of_2) { 519 eigen_assert(isPowerOfTwo(n)); 520 if (n > 8) { 521 compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1); 522 compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1); 523 butterfly_1D_merge<Dir>(data, n, n_power_of_2); 524 } else if (n == 8) { 525 butterfly_8<Dir>(data); 526 } else if (n == 4) { 527 butterfly_4<Dir>(data); 528 } else if (n == 2) { 529 butterfly_2<Dir>(data); 530 } 531 } 532 533 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { 534 Index result = 0; 535 536 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 537 for (int i = NumDims - 1; i > omitted_dim; --i) { 538 const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; 539 const Index idx = index / partial_m_stride; 540 index -= idx * partial_m_stride; 541 result += idx * m_strides[i]; 542 } 543 result += index; 544 } 545 else { 546 for (Index i = 0; i < omitted_dim; ++i) { 547 const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; 548 const Index idx = index / partial_m_stride; 549 index -= idx * partial_m_stride; 550 result += idx * m_strides[i]; 551 } 552 result += index; 553 } 554 // Value of index_coords[omitted_dim] is not determined to this step 555 return result; 556 } 557 558 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { 559 Index result = base + offset * m_strides[omitted_dim] ; 560 return result; 561 } 562 563 protected: 564 Index m_size; 565 const FFT& m_fft; 566 Dimensions m_dimensions; 567 array<Index, NumDims> m_strides; 568 TensorEvaluator<ArgType, Device> m_impl; 569 CoeffReturnType* m_data; 570 const Device& m_device; 571 572 // This will support a maximum FFT size of 2^32 for each dimension 573 // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; 574 const RealScalar m_sin_PI_div_n_LUT[32] = { 575 RealScalar(0.0), 576 RealScalar(-2), 577 RealScalar(-0.999999999999999), 578 RealScalar(-0.292893218813453), 579 RealScalar(-0.0761204674887130), 580 RealScalar(-0.0192147195967696), 581 RealScalar(-0.00481527332780311), 582 RealScalar(-0.00120454379482761), 583 RealScalar(-3.01181303795779e-04), 584 RealScalar(-7.52981608554592e-05), 585 RealScalar(-1.88247173988574e-05), 586 RealScalar(-4.70619042382852e-06), 587 RealScalar(-1.17654829809007e-06), 588 RealScalar(-2.94137117780840e-07), 589 RealScalar(-7.35342821488550e-08), 590 RealScalar(-1.83835707061916e-08), 591 RealScalar(-4.59589268710903e-09), 592 RealScalar(-1.14897317243732e-09), 593 RealScalar(-2.87243293150586e-10), 594 RealScalar( -7.18108232902250e-11), 595 RealScalar(-1.79527058227174e-11), 596 RealScalar(-4.48817645568941e-12), 597 RealScalar(-1.12204411392298e-12), 598 RealScalar(-2.80511028480785e-13), 599 RealScalar(-7.01277571201985e-14), 600 RealScalar(-1.75319392800498e-14), 601 RealScalar(-4.38298482001247e-15), 602 RealScalar(-1.09574620500312e-15), 603 RealScalar(-2.73936551250781e-16), 604 RealScalar(-6.84841378126949e-17), 605 RealScalar(-1.71210344531737e-17), 606 RealScalar(-4.28025861329343e-18) 607 }; 608 609 // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); 610 const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { 611 RealScalar(0.0), 612 RealScalar(0.0), 613 RealScalar(-1.00000000000000e+00), 614 RealScalar(-7.07106781186547e-01), 615 RealScalar(-3.82683432365090e-01), 616 RealScalar(-1.95090322016128e-01), 617 RealScalar(-9.80171403295606e-02), 618 RealScalar(-4.90676743274180e-02), 619 RealScalar(-2.45412285229123e-02), 620 RealScalar(-1.22715382857199e-02), 621 RealScalar(-6.13588464915448e-03), 622 RealScalar(-3.06795676296598e-03), 623 RealScalar(-1.53398018628477e-03), 624 RealScalar(-7.66990318742704e-04), 625 RealScalar(-3.83495187571396e-04), 626 RealScalar(-1.91747597310703e-04), 627 RealScalar(-9.58737990959773e-05), 628 RealScalar(-4.79368996030669e-05), 629 RealScalar(-2.39684498084182e-05), 630 RealScalar(-1.19842249050697e-05), 631 RealScalar(-5.99211245264243e-06), 632 RealScalar(-2.99605622633466e-06), 633 RealScalar(-1.49802811316901e-06), 634 RealScalar(-7.49014056584716e-07), 635 RealScalar(-3.74507028292384e-07), 636 RealScalar(-1.87253514146195e-07), 637 RealScalar(-9.36267570730981e-08), 638 RealScalar(-4.68133785365491e-08), 639 RealScalar(-2.34066892682746e-08), 640 RealScalar(-1.17033446341373e-08), 641 RealScalar(-5.85167231706864e-09), 642 RealScalar(-2.92583615853432e-09) 643 }; 644 }; 645 646 } // end namespace Eigen 647 648 #endif // EIGEN_HAS_CONSTEXPR 649 650 651 #endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H 652