• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #include "main.h"
12 #include "unsupported/Eigen/SpecialFunctions"
13 
14 #if defined __GNUC__ && __GNUC__>=6
15   #pragma GCC diagnostic ignored "-Wignored-attributes"
16 #endif
17 // using namespace Eigen;
18 
19 #ifdef EIGEN_VECTORIZE_SSE
20 const bool g_vectorize_sse = true;
21 #else
22 const bool g_vectorize_sse = false;
23 #endif
24 
25 namespace Eigen {
26 namespace internal {
negate(const T & x)27 template<typename T> T negate(const T& x) { return -x; }
28 }
29 }
30 
31 // NOTE: we disbale inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU.
32 template<typename Scalar> EIGEN_DONT_INLINE
isApproxAbs(const Scalar & a,const Scalar & b,const typename NumTraits<Scalar>::Real & refvalue)33 bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits<Scalar>::Real& refvalue)
34 {
35   return internal::isMuchSmallerThan(a-b, refvalue);
36 }
37 
areApproxAbs(const Scalar * a,const Scalar * b,int size,const typename NumTraits<Scalar>::Real & refvalue)38 template<typename Scalar> bool areApproxAbs(const Scalar* a, const Scalar* b, int size, const typename NumTraits<Scalar>::Real& refvalue)
39 {
40   for (int i=0; i<size; ++i)
41   {
42     if (!isApproxAbs(a[i],b[i],refvalue))
43     {
44       std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
45       return false;
46     }
47   }
48   return true;
49 }
50 
areApprox(const Scalar * a,const Scalar * b,int size)51 template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int size)
52 {
53   for (int i=0; i<size; ++i)
54   {
55     if (a[i]!=b[i] && !internal::isApprox(a[i],b[i]))
56     {
57       std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
58       return false;
59     }
60   }
61   return true;
62 }
63 
64 #define CHECK_CWISE1(REFOP, POP) { \
65   for (int i=0; i<PacketSize; ++i) \
66     ref[i] = REFOP(data1[i]); \
67   internal::pstore(data2, POP(internal::pload<Packet>(data1))); \
68   VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
69 }
70 
71 template<bool Cond,typename Packet>
72 struct packet_helper
73 {
74   template<typename T>
loadpacket_helper75   inline Packet load(const T* from) const { return internal::pload<Packet>(from); }
76 
77   template<typename T>
storepacket_helper78   inline void store(T* to, const Packet& x) const { internal::pstore(to,x); }
79 };
80 
81 template<typename Packet>
82 struct packet_helper<false,Packet>
83 {
84   template<typename T>
loadpacket_helper85   inline T load(const T* from) const { return *from; }
86 
87   template<typename T>
storepacket_helper88   inline void store(T* to, const T& x) const { *to = x; }
89 };
90 
91 #define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \
92   packet_helper<COND,Packet> h; \
93   for (int i=0; i<PacketSize; ++i) \
94     ref[i] = REFOP(data1[i]); \
95   h.store(data2, POP(h.load(data1))); \
96   VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
97 }
98 
99 #define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
100   packet_helper<COND,Packet> h; \
101   for (int i=0; i<PacketSize; ++i) \
102     ref[i] = REFOP(data1[i], data1[i+PacketSize]); \
103   h.store(data2, POP(h.load(data1),h.load(data1+PacketSize))); \
104   VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
105 }
106 
107 #define REF_ADD(a,b) ((a)+(b))
108 #define REF_SUB(a,b) ((a)-(b))
109 #define REF_MUL(a,b) ((a)*(b))
110 #define REF_DIV(a,b) ((a)/(b))
111 
packetmath()112 template<typename Scalar> void packetmath()
113 {
114   using std::abs;
115   typedef internal::packet_traits<Scalar> PacketTraits;
116   typedef typename PacketTraits::type Packet;
117   const int PacketSize = PacketTraits::size;
118   typedef typename NumTraits<Scalar>::Real RealScalar;
119 
120   const int max_size = PacketSize > 4 ? PacketSize : 4;
121   const int size = PacketSize*max_size;
122   EIGEN_ALIGN_MAX Scalar data1[size];
123   EIGEN_ALIGN_MAX Scalar data2[size];
124   EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
125   EIGEN_ALIGN_MAX Scalar ref[size];
126   RealScalar refvalue = 0;
127   for (int i=0; i<size; ++i)
128   {
129     data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
130     data2[i] = internal::random<Scalar>()/RealScalar(PacketSize);
131     refvalue = (std::max)(refvalue,abs(data1[i]));
132   }
133 
134   internal::pstore(data2, internal::pload<Packet>(data1));
135   VERIFY(areApprox(data1, data2, PacketSize) && "aligned load/store");
136 
137   for (int offset=0; offset<PacketSize; ++offset)
138   {
139     internal::pstore(data2, internal::ploadu<Packet>(data1+offset));
140     VERIFY(areApprox(data1+offset, data2, PacketSize) && "internal::ploadu");
141   }
142 
143   for (int offset=0; offset<PacketSize; ++offset)
144   {
145     internal::pstoreu(data2+offset, internal::pload<Packet>(data1));
146     VERIFY(areApprox(data1, data2+offset, PacketSize) && "internal::pstoreu");
147   }
148 
149   for (int offset=0; offset<PacketSize; ++offset)
150   {
151     packets[0] = internal::pload<Packet>(data1);
152     packets[1] = internal::pload<Packet>(data1+PacketSize);
153          if (offset==0) internal::palign<0>(packets[0], packets[1]);
154     else if (offset==1) internal::palign<1>(packets[0], packets[1]);
155     else if (offset==2) internal::palign<2>(packets[0], packets[1]);
156     else if (offset==3) internal::palign<3>(packets[0], packets[1]);
157     else if (offset==4) internal::palign<4>(packets[0], packets[1]);
158     else if (offset==5) internal::palign<5>(packets[0], packets[1]);
159     else if (offset==6) internal::palign<6>(packets[0], packets[1]);
160     else if (offset==7) internal::palign<7>(packets[0], packets[1]);
161     else if (offset==8) internal::palign<8>(packets[0], packets[1]);
162     else if (offset==9) internal::palign<9>(packets[0], packets[1]);
163     else if (offset==10) internal::palign<10>(packets[0], packets[1]);
164     else if (offset==11) internal::palign<11>(packets[0], packets[1]);
165     else if (offset==12) internal::palign<12>(packets[0], packets[1]);
166     else if (offset==13) internal::palign<13>(packets[0], packets[1]);
167     else if (offset==14) internal::palign<14>(packets[0], packets[1]);
168     else if (offset==15) internal::palign<15>(packets[0], packets[1]);
169     internal::pstore(data2, packets[0]);
170 
171     for (int i=0; i<PacketSize; ++i)
172       ref[i] = data1[i+offset];
173 
174     VERIFY(areApprox(ref, data2, PacketSize) && "internal::palign");
175   }
176 
177   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
178   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
179   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
180   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate);
181   VERIFY((internal::is_same<Scalar,int>::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv);
182 
183   CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD,  internal::padd);
184   CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB,  internal::psub);
185   CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL,  internal::pmul);
186   CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
187 
188   CHECK_CWISE1(internal::negate, internal::pnegate);
189   CHECK_CWISE1(numext::conj, internal::pconj);
190 
191   for(int offset=0;offset<3;++offset)
192   {
193     for (int i=0; i<PacketSize; ++i)
194       ref[i] = data1[offset];
195     internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
196     VERIFY(areApprox(ref, data2, PacketSize) && "internal::pset1");
197   }
198 
199   {
200     for (int i=0; i<PacketSize*4; ++i)
201       ref[i] = data1[i/PacketSize];
202     Packet A0, A1, A2, A3;
203     internal::pbroadcast4<Packet>(data1, A0, A1, A2, A3);
204     internal::pstore(data2+0*PacketSize, A0);
205     internal::pstore(data2+1*PacketSize, A1);
206     internal::pstore(data2+2*PacketSize, A2);
207     internal::pstore(data2+3*PacketSize, A3);
208     VERIFY(areApprox(ref, data2, 4*PacketSize) && "internal::pbroadcast4");
209   }
210 
211   {
212     for (int i=0; i<PacketSize*2; ++i)
213       ref[i] = data1[i/PacketSize];
214     Packet A0, A1;
215     internal::pbroadcast2<Packet>(data1, A0, A1);
216     internal::pstore(data2+0*PacketSize, A0);
217     internal::pstore(data2+1*PacketSize, A1);
218     VERIFY(areApprox(ref, data2, 2*PacketSize) && "internal::pbroadcast2");
219   }
220 
221   VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload<Packet>(data1))) && "internal::pfirst");
222 
223   if(PacketSize>1)
224   {
225     for(int offset=0;offset<4;++offset)
226     {
227       for(int i=0;i<PacketSize/2;++i)
228         ref[2*i+0] = ref[2*i+1] = data1[offset+i];
229       internal::pstore(data2,internal::ploaddup<Packet>(data1+offset));
230       VERIFY(areApprox(ref, data2, PacketSize) && "ploaddup");
231     }
232   }
233 
234   if(PacketSize>2)
235   {
236     for(int offset=0;offset<4;++offset)
237     {
238       for(int i=0;i<PacketSize/4;++i)
239         ref[4*i+0] = ref[4*i+1] = ref[4*i+2] = ref[4*i+3] = data1[offset+i];
240       internal::pstore(data2,internal::ploadquad<Packet>(data1+offset));
241       VERIFY(areApprox(ref, data2, PacketSize) && "ploadquad");
242     }
243   }
244 
245   ref[0] = 0;
246   for (int i=0; i<PacketSize; ++i)
247     ref[0] += data1[i];
248   VERIFY(isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
249 
250   {
251     for (int i=0; i<4; ++i)
252       ref[i] = 0;
253     for (int i=0; i<PacketSize; ++i)
254       ref[i%4] += data1[i];
255     internal::pstore(data2, internal::predux_downto4(internal::pload<Packet>(data1)));
256     VERIFY(areApprox(ref, data2, PacketSize>4?PacketSize/2:PacketSize) && "internal::predux_downto4");
257   }
258 
259   ref[0] = 1;
260   for (int i=0; i<PacketSize; ++i)
261     ref[0] *= data1[i];
262   VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
263 
264   for (int j=0; j<PacketSize; ++j)
265   {
266     ref[j] = 0;
267     for (int i=0; i<PacketSize; ++i)
268       ref[j] += data1[i+j*PacketSize];
269     packets[j] = internal::pload<Packet>(data1+j*PacketSize);
270   }
271   internal::pstore(data2, internal::preduxp(packets));
272   VERIFY(areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp");
273 
274   for (int i=0; i<PacketSize; ++i)
275     ref[i] = data1[PacketSize-i-1];
276   internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
277   VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse");
278 
279   internal::PacketBlock<Packet> kernel;
280   for (int i=0; i<PacketSize; ++i) {
281     kernel.packet[i] = internal::pload<Packet>(data1+i*PacketSize);
282   }
283   ptranspose(kernel);
284   for (int i=0; i<PacketSize; ++i) {
285     internal::pstore(data2, kernel.packet[i]);
286     for (int j = 0; j < PacketSize; ++j) {
287       VERIFY(isApproxAbs(data2[j], data1[i+j*PacketSize], refvalue) && "ptranspose");
288     }
289   }
290 
291   if (PacketTraits::HasBlend) {
292     Packet thenPacket = internal::pload<Packet>(data1);
293     Packet elsePacket = internal::pload<Packet>(data2);
294     EIGEN_ALIGN_MAX internal::Selector<PacketSize> selector;
295     for (int i = 0; i < PacketSize; ++i) {
296       selector.select[i] = i;
297     }
298 
299     Packet blend = internal::pblend(selector, thenPacket, elsePacket);
300     EIGEN_ALIGN_MAX Scalar result[size];
301     internal::pstore(result, blend);
302     for (int i = 0; i < PacketSize; ++i) {
303       VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
304     }
305   }
306 
307   if (PacketTraits::HasBlend || g_vectorize_sse) {
308     // pinsertfirst
309     for (int i=0; i<PacketSize; ++i)
310       ref[i] = data1[i];
311     Scalar s = internal::random<Scalar>();
312     ref[0] = s;
313     internal::pstore(data2, internal::pinsertfirst(internal::pload<Packet>(data1),s));
314     VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertfirst");
315   }
316 
317   if (PacketTraits::HasBlend || g_vectorize_sse) {
318     // pinsertlast
319     for (int i=0; i<PacketSize; ++i)
320       ref[i] = data1[i];
321     Scalar s = internal::random<Scalar>();
322     ref[PacketSize-1] = s;
323     internal::pstore(data2, internal::pinsertlast(internal::pload<Packet>(data1),s));
324     VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertlast");
325   }
326 }
327 
packetmath_real()328 template<typename Scalar> void packetmath_real()
329 {
330   using std::abs;
331   typedef internal::packet_traits<Scalar> PacketTraits;
332   typedef typename PacketTraits::type Packet;
333   const int PacketSize = PacketTraits::size;
334 
335   const int size = PacketSize*4;
336   EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4];
337   EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4];
338   EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4];
339 
340   for (int i=0; i<size; ++i)
341   {
342     data1[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
343     data2[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
344   }
345   CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
346   CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
347   CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
348 
349   CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
350   CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
351   CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
352 
353   for (int i=0; i<size; ++i)
354   {
355     data1[i] = internal::random<Scalar>(-1,1);
356     data2[i] = internal::random<Scalar>(-1,1);
357   }
358   CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
359   CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
360 
361   for (int i=0; i<size; ++i)
362   {
363     data1[i] = internal::random<Scalar>(-87,88);
364     data2[i] = internal::random<Scalar>(-87,88);
365   }
366   CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
367   for (int i=0; i<size; ++i)
368   {
369     data1[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
370     data2[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
371   }
372   CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh);
373   if(PacketTraits::HasExp && PacketTraits::size>=2)
374   {
375     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
376     data1[1] = std::numeric_limits<Scalar>::epsilon();
377     packet_helper<PacketTraits::HasExp,Packet> h;
378     h.store(data2, internal::pexp(h.load(data1)));
379     VERIFY((numext::isnan)(data2[0]));
380     VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
381 
382     data1[0] = -std::numeric_limits<Scalar>::epsilon();
383     data1[1] = 0;
384     h.store(data2, internal::pexp(h.load(data1)));
385     VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
386     VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
387 
388     data1[0] = (std::numeric_limits<Scalar>::min)();
389     data1[1] = -(std::numeric_limits<Scalar>::min)();
390     h.store(data2, internal::pexp(h.load(data1)));
391     VERIFY_IS_EQUAL(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
392     VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
393 
394     data1[0] = std::numeric_limits<Scalar>::denorm_min();
395     data1[1] = -std::numeric_limits<Scalar>::denorm_min();
396     h.store(data2, internal::pexp(h.load(data1)));
397     VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
398     VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
399   }
400 
401   if (PacketTraits::HasTanh) {
402     // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
403     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
404     packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
405     h.store(data2, internal::ptanh(h.load(data1)));
406     VERIFY((numext::isnan)(data2[0]));
407   }
408 
409 #if EIGEN_HAS_C99_MATH
410   {
411     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
412     packet_helper<internal::packet_traits<Scalar>::HasLGamma,Packet> h;
413     h.store(data2, internal::plgamma(h.load(data1)));
414     VERIFY((numext::isnan)(data2[0]));
415   }
416   {
417     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
418     packet_helper<internal::packet_traits<Scalar>::HasErf,Packet> h;
419     h.store(data2, internal::perf(h.load(data1)));
420     VERIFY((numext::isnan)(data2[0]));
421   }
422   {
423     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
424     packet_helper<internal::packet_traits<Scalar>::HasErfc,Packet> h;
425     h.store(data2, internal::perfc(h.load(data1)));
426     VERIFY((numext::isnan)(data2[0]));
427   }
428 #endif  // EIGEN_HAS_C99_MATH
429 
430   for (int i=0; i<size; ++i)
431   {
432     data1[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
433     data2[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
434   }
435 
436   if(internal::random<float>(0,1)<0.1f)
437     data1[internal::random<int>(0, PacketSize)] = 0;
438   CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt);
439   CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
440 #if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L)
441   CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p);
442   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
443   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
444   CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);
445 #endif
446 
447   if(PacketTraits::HasLog && PacketTraits::size>=2)
448   {
449     data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
450     data1[1] = std::numeric_limits<Scalar>::epsilon();
451     packet_helper<PacketTraits::HasLog,Packet> h;
452     h.store(data2, internal::plog(h.load(data1)));
453     VERIFY((numext::isnan)(data2[0]));
454     VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
455 
456     data1[0] = -std::numeric_limits<Scalar>::epsilon();
457     data1[1] = 0;
458     h.store(data2, internal::plog(h.load(data1)));
459     VERIFY((numext::isnan)(data2[0]));
460     VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
461 
462     data1[0] = (std::numeric_limits<Scalar>::min)();
463     data1[1] = -(std::numeric_limits<Scalar>::min)();
464     h.store(data2, internal::plog(h.load(data1)));
465     VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
466     VERIFY((numext::isnan)(data2[1]));
467 
468     data1[0] = std::numeric_limits<Scalar>::denorm_min();
469     data1[1] = -std::numeric_limits<Scalar>::denorm_min();
470     h.store(data2, internal::plog(h.load(data1)));
471     // VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
472     VERIFY((numext::isnan)(data2[1]));
473 
474     data1[0] = Scalar(-1.0f);
475     h.store(data2, internal::plog(h.load(data1)));
476     VERIFY((numext::isnan)(data2[0]));
477     h.store(data2, internal::psqrt(h.load(data1)));
478     VERIFY((numext::isnan)(data2[0]));
479     VERIFY((numext::isnan)(data2[1]));
480   }
481 }
482 
packetmath_notcomplex()483 template<typename Scalar> void packetmath_notcomplex()
484 {
485   using std::abs;
486   typedef internal::packet_traits<Scalar> PacketTraits;
487   typedef typename PacketTraits::type Packet;
488   const int PacketSize = PacketTraits::size;
489 
490   EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4];
491   EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4];
492   EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4];
493 
494   Array<Scalar,Dynamic,1>::Map(data1, PacketTraits::size*4).setRandom();
495 
496   ref[0] = data1[0];
497   for (int i=0; i<PacketSize; ++i)
498     ref[0] = (std::min)(ref[0],data1[i]);
499   VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
500 
501   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
502   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
503 
504   CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
505   CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
506   CHECK_CWISE1(abs, internal::pabs);
507 
508   ref[0] = data1[0];
509   for (int i=0; i<PacketSize; ++i)
510     ref[0] = (std::max)(ref[0],data1[i]);
511   VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
512 
513   for (int i=0; i<PacketSize; ++i)
514     ref[i] = data1[0]+Scalar(i);
515   internal::pstore(data2, internal::plset<Packet>(data1[0]));
516   VERIFY(areApprox(ref, data2, PacketSize) && "internal::plset");
517 }
518 
test_conj_helper(Scalar * data1,Scalar * data2,Scalar * ref,Scalar * pval)519 template<typename Scalar,bool ConjLhs,bool ConjRhs> void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval)
520 {
521   typedef internal::packet_traits<Scalar> PacketTraits;
522   typedef typename PacketTraits::type Packet;
523   const int PacketSize = PacketTraits::size;
524 
525   internal::conj_if<ConjLhs> cj0;
526   internal::conj_if<ConjRhs> cj1;
527   internal::conj_helper<Scalar,Scalar,ConjLhs,ConjRhs> cj;
528   internal::conj_helper<Packet,Packet,ConjLhs,ConjRhs> pcj;
529 
530   for(int i=0;i<PacketSize;++i)
531   {
532     ref[i] = cj0(data1[i]) * cj1(data2[i]);
533     VERIFY(internal::isApprox(ref[i], cj.pmul(data1[i],data2[i])) && "conj_helper pmul");
534   }
535   internal::pstore(pval,pcj.pmul(internal::pload<Packet>(data1),internal::pload<Packet>(data2)));
536   VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmul");
537 
538   for(int i=0;i<PacketSize;++i)
539   {
540     Scalar tmp = ref[i];
541     ref[i] += cj0(data1[i]) * cj1(data2[i]);
542     VERIFY(internal::isApprox(ref[i], cj.pmadd(data1[i],data2[i],tmp)) && "conj_helper pmadd");
543   }
544   internal::pstore(pval,pcj.pmadd(internal::pload<Packet>(data1),internal::pload<Packet>(data2),internal::pload<Packet>(pval)));
545   VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
546 }
547 
packetmath_complex()548 template<typename Scalar> void packetmath_complex()
549 {
550   typedef internal::packet_traits<Scalar> PacketTraits;
551   typedef typename PacketTraits::type Packet;
552   const int PacketSize = PacketTraits::size;
553 
554   const int size = PacketSize*4;
555   EIGEN_ALIGN_MAX Scalar data1[PacketSize*4];
556   EIGEN_ALIGN_MAX Scalar data2[PacketSize*4];
557   EIGEN_ALIGN_MAX Scalar ref[PacketSize*4];
558   EIGEN_ALIGN_MAX Scalar pval[PacketSize*4];
559 
560   for (int i=0; i<size; ++i)
561   {
562     data1[i] = internal::random<Scalar>() * Scalar(1e2);
563     data2[i] = internal::random<Scalar>() * Scalar(1e2);
564   }
565 
566   test_conj_helper<Scalar,false,false> (data1,data2,ref,pval);
567   test_conj_helper<Scalar,false,true>  (data1,data2,ref,pval);
568   test_conj_helper<Scalar,true,false>  (data1,data2,ref,pval);
569   test_conj_helper<Scalar,true,true>   (data1,data2,ref,pval);
570 
571   {
572     for(int i=0;i<PacketSize;++i)
573       ref[i] = Scalar(std::imag(data1[i]),std::real(data1[i]));
574     internal::pstore(pval,internal::pcplxflip(internal::pload<Packet>(data1)));
575     VERIFY(areApprox(ref, pval, PacketSize) && "pcplxflip");
576   }
577 }
578 
packetmath_scatter_gather()579 template<typename Scalar> void packetmath_scatter_gather()
580 {
581   typedef internal::packet_traits<Scalar> PacketTraits;
582   typedef typename PacketTraits::type Packet;
583   typedef typename NumTraits<Scalar>::Real RealScalar;
584   const int PacketSize = PacketTraits::size;
585   EIGEN_ALIGN_MAX Scalar data1[PacketSize];
586   RealScalar refvalue = 0;
587   for (int i=0; i<PacketSize; ++i) {
588     data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
589   }
590 
591   int stride = internal::random<int>(1,20);
592 
593   EIGEN_ALIGN_MAX Scalar buffer[PacketSize*20];
594   memset(buffer, 0, 20*PacketSize*sizeof(Scalar));
595   Packet packet = internal::pload<Packet>(data1);
596   internal::pscatter<Scalar, Packet>(buffer, packet, stride);
597 
598   for (int i = 0; i < PacketSize*20; ++i) {
599     if ((i%stride) == 0 && i<stride*PacketSize) {
600       VERIFY(isApproxAbs(buffer[i], data1[i/stride], refvalue) && "pscatter");
601     } else {
602       VERIFY(isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter");
603     }
604   }
605 
606   for (int i=0; i<PacketSize*7; ++i) {
607     buffer[i] = internal::random<Scalar>()/RealScalar(PacketSize);
608   }
609   packet = internal::pgather<Scalar, Packet>(buffer, 7);
610   internal::pstore(data1, packet);
611   for (int i = 0; i < PacketSize; ++i) {
612     VERIFY(isApproxAbs(data1[i], buffer[i*7], refvalue) && "pgather");
613   }
614 }
615 
test_packetmath()616 void test_packetmath()
617 {
618   for(int i = 0; i < g_repeat; i++) {
619     CALL_SUBTEST_1( packetmath<float>() );
620     CALL_SUBTEST_2( packetmath<double>() );
621     CALL_SUBTEST_3( packetmath<int>() );
622     CALL_SUBTEST_4( packetmath<std::complex<float> >() );
623     CALL_SUBTEST_5( packetmath<std::complex<double> >() );
624 
625     CALL_SUBTEST_1( packetmath_notcomplex<float>() );
626     CALL_SUBTEST_2( packetmath_notcomplex<double>() );
627     CALL_SUBTEST_3( packetmath_notcomplex<int>() );
628 
629     CALL_SUBTEST_1( packetmath_real<float>() );
630     CALL_SUBTEST_2( packetmath_real<double>() );
631 
632     CALL_SUBTEST_4( packetmath_complex<std::complex<float> >() );
633     CALL_SUBTEST_5( packetmath_complex<std::complex<double> >() );
634 
635     CALL_SUBTEST_1( packetmath_scatter_gather<float>() );
636     CALL_SUBTEST_2( packetmath_scatter_gather<double>() );
637     CALL_SUBTEST_3( packetmath_scatter_gather<int>() );
638     CALL_SUBTEST_4( packetmath_scatter_gather<std::complex<float> >() );
639     CALL_SUBTEST_5( packetmath_scatter_gather<std::complex<double> >() );
640   }
641 }
642