1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
12
13 namespace Eigen {
14 namespace internal {
15
16 namespace {
17
get_random_seed()18 EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
19 #ifdef __CUDA_ARCH__
20 // We don't support 3d kernels since we currently only use 1 and
21 // 2d kernels.
22 assert(threadIdx.z == 0);
23 return clock64() +
24 blockIdx.x * blockDim.x + threadIdx.x +
25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
26
27 #elif defined _WIN32
28 // Use the current time as a baseline.
29 SYSTEMTIME st;
30 GetSystemTime(&st);
31 int time = st.wSecond + 1000 * st.wMilliseconds;
32 // Mix in a random number to make sure that we get different seeds if
33 // we try to generate seeds faster than the clock resolution.
34 // We need 2 random values since the generator only generate 16 bits at
35 // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx)
36 int rnd1 = ::rand();
37 int rnd2 = ::rand();
38 uint64_t rnd = (rnd1 | rnd2 << 16) ^ time;
39 return rnd;
40
41 #elif defined __APPLE__
42 // Same approach as for win32, except that the random number generator
43 // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random).
44 uint64_t rnd = ::random() ^ mach_absolute_time();
45 return rnd;
46
47 #else
48 // Augment the current time with pseudo random number generation
49 // to ensure that we get different seeds if we try to generate seeds
50 // faster than the clock resolution.
51 timespec ts;
52 clock_gettime(CLOCK_REALTIME, &ts);
53 uint64_t rnd = ::random() ^ ts.tv_nsec;
54 return rnd;
55 #endif
56 }
57
PCG_XSH_RS_generator(uint64_t * state)58 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
59 // TODO: Unify with the implementation in the non blocking thread pool.
60 uint64_t current = *state;
61 // Update the internal state
62 *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
63 // Generate the random output (using the PCG-XSH-RS scheme)
64 return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
65 }
66
PCG_XSH_RS_state(uint64_t seed)67 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
68 seed = seed ? seed : get_random_seed();
69 return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
70 }
71
72 } // namespace
73
74
75 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
RandomToTypeUniform(uint64_t * state)76 T RandomToTypeUniform(uint64_t* state) {
77 unsigned rnd = PCG_XSH_RS_generator(state);
78 return static_cast<T>(rnd);
79 }
80
81
82 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
83 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) {
84 Eigen::half result;
85 // Generate 10 random bits for the mantissa
86 unsigned rnd = PCG_XSH_RS_generator(state);
87 result.x = static_cast<uint16_t>(rnd & 0x3ffu);
88 // Set the exponent
89 result.x |= (static_cast<uint16_t>(15) << 10);
90 // Return the final result
91 return result - Eigen::half(1.0f);
92 }
93
94
95 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
96 float RandomToTypeUniform<float>(uint64_t* state) {
97 typedef union {
98 uint32_t raw;
99 float fp;
100 } internal;
101 internal result;
102 // Generate 23 random bits for the mantissa mantissa
103 const unsigned rnd = PCG_XSH_RS_generator(state);
104 result.raw = rnd & 0x7fffffu;
105 // Set the exponent
106 result.raw |= (static_cast<uint32_t>(127) << 23);
107 // Return the final result
108 return result.fp - 1.0f;
109 }
110
111 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
112 double RandomToTypeUniform<double>(uint64_t* state) {
113 typedef union {
114 uint64_t raw;
115 double dp;
116 } internal;
117 internal result;
118 result.raw = 0;
119 // Generate 52 random bits for the mantissa
120 // First generate the upper 20 bits
121 unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu;
122 // The generate the lower 32 bits
123 unsigned rnd2 = PCG_XSH_RS_generator(state);
124 result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2;
125 // Set the exponent
126 result.raw |= (static_cast<uint64_t>(1023) << 52);
127 // Return the final result
128 return result.dp - 1.0;
129 }
130
131 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
132 std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) {
133 return std::complex<float>(RandomToTypeUniform<float>(state),
134 RandomToTypeUniform<float>(state));
135 }
136 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
137 std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) {
138 return std::complex<double>(RandomToTypeUniform<double>(state),
139 RandomToTypeUniform<double>(state));
140 }
141
142 template <typename T> class UniformRandomGenerator {
143 public:
144 static const bool PacketAccess = true;
145
146 // Uses the given "seed" if non-zero, otherwise uses a random seed.
147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
148 uint64_t seed = 0) {
149 m_state = PCG_XSH_RS_state(seed);
150 }
UniformRandomGenerator(const UniformRandomGenerator & other)151 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
152 const UniformRandomGenerator& other) {
153 m_state = other.m_state;
154 }
155
156 template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
operator()157 T operator()(Index i) const {
158 uint64_t local_state = m_state + i;
159 T result = RandomToTypeUniform<T>(&local_state);
160 m_state = local_state;
161 return result;
162 }
163
164 template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
packetOp(Index i)165 Packet packetOp(Index i) const {
166 const int packetSize = internal::unpacket_traits<Packet>::size;
167 EIGEN_ALIGN_MAX T values[packetSize];
168 uint64_t local_state = m_state + i;
169 for (int j = 0; j < packetSize; ++j) {
170 values[j] = RandomToTypeUniform<T>(&local_state);
171 }
172 m_state = local_state;
173 return internal::pload<Packet>(values);
174 }
175
176 private:
177 mutable uint64_t m_state;
178 };
179
180 template <typename Scalar>
181 struct functor_traits<UniformRandomGenerator<Scalar> > {
182 enum {
183 // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)).
184 Cost = 12 * NumTraits<Scalar>::AddCost *
185 ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)),
186 PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
187 };
188 };
189
190
191
192 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
193 T RandomToTypeNormal(uint64_t* state) {
194 // Use the ratio of uniform method to generate numbers following a normal
195 // distribution. See for example Numerical Recipes chapter 7.3.9 for the
196 // details.
197 T u, v, q;
198 do {
199 u = RandomToTypeUniform<T>(state);
200 v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5));
201 const T x = u - T(0.449871);
202 const T y = numext::abs(v) + T(0.386595);
203 q = x*x + y * (T(0.196)*y - T(0.25472)*x);
204 } while (q > T(0.27597) &&
205 (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u));
206
207 return v/u;
208 }
209
210 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
211 std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) {
212 return std::complex<float>(RandomToTypeNormal<float>(state),
213 RandomToTypeNormal<float>(state));
214 }
215 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
216 std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) {
217 return std::complex<double>(RandomToTypeNormal<double>(state),
218 RandomToTypeNormal<double>(state));
219 }
220
221
222 template <typename T> class NormalRandomGenerator {
223 public:
224 static const bool PacketAccess = true;
225
226 // Uses the given "seed" if non-zero, otherwise uses a random seed.
227 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
228 m_state = PCG_XSH_RS_state(seed);
229 }
230 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
231 const NormalRandomGenerator& other) {
232 m_state = other.m_state;
233 }
234
235 template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
236 T operator()(Index i) const {
237 uint64_t local_state = m_state + i;
238 T result = RandomToTypeNormal<T>(&local_state);
239 m_state = local_state;
240 return result;
241 }
242
243 template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
244 Packet packetOp(Index i) const {
245 const int packetSize = internal::unpacket_traits<Packet>::size;
246 EIGEN_ALIGN_MAX T values[packetSize];
247 uint64_t local_state = m_state + i;
248 for (int j = 0; j < packetSize; ++j) {
249 values[j] = RandomToTypeNormal<T>(&local_state);
250 }
251 m_state = local_state;
252 return internal::pload<Packet>(values);
253 }
254
255 private:
256 mutable uint64_t m_state;
257 };
258
259
260 template <typename Scalar>
261 struct functor_traits<NormalRandomGenerator<Scalar> > {
262 enum {
263 // On average, we need to generate about 3 random numbers
264 // 15 mul, 8 add, 1.5 logs
265 Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost +
266 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost +
267 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2,
268 PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
269 };
270 };
271
272
273 } // end namespace internal
274 } // end namespace Eigen
275
276 #endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
277