1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
17 #define TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
18
19 #define _USE_MATH_DEFINES
20 #include <math.h>
21 #include <cmath>
22 #undef _USE_MATH_DEFINES
23
24 #include <string.h>
25 #include <algorithm>
26 #include <type_traits>
27
28 #include "philox_random.h"
29 #include "tensorflow/core/lib/bfloat16/bfloat16.h"
30 #include "unsupported/Eigen/CXX11/Tensor"
31
32 namespace tensorflow {
33 namespace random {
34
35 // Helper function to convert a 16-bit integer to a half between [0..1).
36 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
37 // Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
38 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
39 // Helper function to convert a 32-bit integer to a float between [0..1).
40 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
41 // Helper function to convert two 32-bit integers to a double between [0..1).
42 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
43
44 // Computes a + b. Requires that the result is representable in the destination
45 // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
46 // need *not* be representable in that type. (The condition on b excludes the
47 // extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
48 // compute.)
49 template <typename Int>
SignedAdd(Int a,typename std::make_unsigned<Int>::type b)50 PHILOX_DEVICE_INLINE Int SignedAdd(Int a, typename std::make_unsigned<Int>::type b) {
51 // Implementation note: both b_div_2 and b - b_div_2 are positive and
52 // representatble as Int.
53 auto b_div_2 = b >> 1;
54 return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
55 }
56
57 // A class that generates uniform distribution random numbers from the
58 // underlying random integer generator.
59 // Arguments:
60 // Generator: a generator type that returns a number of uint32 upon each
61 // invocation. It needs to define kResultElementCount for the
62 // sample count for each invocation, and ResultType for the
63 // actual returned sample type.
64 // RealType: the data type of the real numbers that will be returned by the
65 // distribution. This could be either float or double for now.
66 // This class is meant to be implemented through specialization. The default
67 // is not defined by design.
68 template <class Generator, typename RealType>
69 class UniformDistribution;
70
71 template <class Generator>
72 class UniformDistribution<Generator, Eigen::half> {
73 public:
74 // The number of elements that will be returned.
75 static const int kResultElementCount = Generator::kResultElementCount;
76 // Cost of generation of a single element (in cycles).
77 static const int kElementCost = 3;
78 // Indicate that this distribution may take variable number of samples
79 // during the runtime.
80 static const bool kVariableSamplesPerOutput = false;
81 typedef Array<Eigen::half, kResultElementCount> ResultType;
82 typedef Eigen::half ResultElementType;
83
84 PHILOX_DEVICE_INLINE
operator()85 ResultType operator()(Generator* gen) {
86 typename Generator::ResultType sample = (*gen)();
87 ResultType result;
88 for (int i = 0; i < kResultElementCount; ++i) {
89 result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits.
90 }
91 return result;
92 }
93 };
94
95 template <class Generator>
96 class UniformDistribution<Generator, bfloat16> {
97 public:
98 // The number of elements that will be returned.
99 static const int kResultElementCount = Generator::kResultElementCount;
100 // Cost of generation of a single element (in cycles).
101 static const int kElementCost = 3;
102 // Indicate that this distribution may take variable number of samples
103 // during the runtime.
104 static const bool kVariableSamplesPerOutput = false;
105 typedef Array<bfloat16, kResultElementCount> ResultType;
106 typedef bfloat16 ResultElementType;
107
108 PHILOX_DEVICE_INLINE
operator()109 ResultType operator()(Generator* gen) {
110 typename Generator::ResultType sample = (*gen)();
111 ResultType result;
112 for (int i = 0; i < kResultElementCount; ++i) {
113 result[i] = Uint16ToGfloat16(sample[i]);
114 }
115 return result;
116 }
117 };
118
119 template <class Generator>
120 class UniformDistribution<Generator, float> {
121 public:
122 // The number of elements that will be returned.
123 static const int kResultElementCount = Generator::kResultElementCount;
124 // Cost of generation of a single element (in cycles).
125 static const int kElementCost = 3;
126 // Indicate that this distribution may take variable number of samples
127 // during the runtime.
128 static const bool kVariableSamplesPerOutput = false;
129 typedef Array<float, kResultElementCount> ResultType;
130 typedef float ResultElementType;
131
132 PHILOX_DEVICE_INLINE
operator()133 ResultType operator()(Generator* gen) {
134 typename Generator::ResultType sample = (*gen)();
135 ResultType result;
136 for (int i = 0; i < kResultElementCount; ++i) {
137 result[i] = Uint32ToFloat(sample[i]);
138 }
139 return result;
140 }
141 };
142
143 template <class Generator>
144 class UniformDistribution<Generator, double> {
145 public:
146 // The number of elements that will be returned.
147 static const int kResultElementCount = Generator::kResultElementCount / 2;
148 // Cost of generation of a single element (in cycles).
149 static const int kElementCost = 3;
150 // Indicate that this distribution may take variable number of samples
151 // during the runtime.
152 static const bool kVariableSamplesPerOutput = false;
153 typedef Array<double, kResultElementCount> ResultType;
154 typedef double ResultElementType;
155
156 PHILOX_DEVICE_INLINE
operator()157 ResultType operator()(Generator* gen) {
158 typename Generator::ResultType sample = (*gen)();
159 ResultType result;
160 for (int i = 0; i < kResultElementCount; ++i) {
161 result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
162 }
163 return result;
164 }
165 };
166
167 template <class Generator>
168 class UniformDistribution<Generator, int32> {
169 public:
170 // The number of elements that will be returned.
171 static const int kResultElementCount = Generator::kResultElementCount;
172 // Cost of generation of a single element (in cycles).
173 static const int kElementCost = 3;
174 // Indicate that this distribution may take variable number of samples
175 // during the runtime.
176 static const bool kVariableSamplesPerOutput = false;
177 typedef Array<int32, kResultElementCount> ResultType;
178 typedef int32 ResultElementType;
179
180 // Must have lo < hi
UniformDistribution(int32 lo,int32 hi)181 UniformDistribution(int32 lo, int32 hi)
182 : lo_(lo), range_(static_cast<uint32>(hi) - static_cast<uint32>(lo)) {}
183
184 PHILOX_DEVICE_INLINE
operator()185 ResultType operator()(Generator* gen) {
186 typename Generator::ResultType sample = (*gen)();
187 ResultType result;
188 for (int i = 0; i < kResultElementCount; ++i) {
189 result[i] = SignedAdd(lo_, sample[i] % range_);
190 }
191 return result;
192 }
193
194 private:
195 // Note that lo_ is intentionally signed while range_ is intentionally
196 // unsigned. This is because hi - lo can overflow signed integers if
197 // lo < 0 < hi, but always fits in unsigned.
198 int32 lo_;
199 uint32 range_;
200 };
201
202 template <class Generator>
203 class UniformDistribution<Generator, int64> {
204 public:
205 // The number of elements that will be returned.
206 static const int kResultElementCount = Generator::kResultElementCount / 2;
207 // Cost of generation of a single element (in cycles).
208 static const int kElementCost = 3;
209 // Indicate that this distribution may take variable number of samples
210 // during the runtime.
211 static const bool kVariableSamplesPerOutput = false;
212 typedef Array<int64, kResultElementCount> ResultType;
213 typedef int64 ResultElementType;
214
215 // Must have lo < hi
UniformDistribution(int64 lo,int64 hi)216 UniformDistribution(int64 lo, int64 hi)
217 : lo_(lo), range_(static_cast<uint64>(hi) - static_cast<uint64>(lo)) {}
218
219 PHILOX_DEVICE_INLINE
operator()220 ResultType operator()(Generator* gen) {
221 typename Generator::ResultType sample = (*gen)();
222 ResultType result;
223 for (int i = 0; i < kResultElementCount; ++i) {
224 auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
225 result[i] = SignedAdd(lo_, bits % range_);
226 }
227 return result;
228 }
229
230 private:
231 // Note that lo_ is intentionally signed while range_ is intentionally
232 // unsigned. This is because hi - lo can overflow signed integers if
233 // lo < 0 < hi, but always fits in unsigned.
234 int64 lo_;
235 uint64 range_;
236 };
237
238 // A class that adapts the underlying native multiple samples to return a single
239 // sample at a time.
240 template <class Generator>
241 class SingleSampleAdapter {
242 public:
243 // The number of elements that will be returned.
244 static const int kResultElementCount = 1;
245 // The number of elements that will be returned by the underlying generator.
246 static const int kNativeElementCount = Generator::kResultElementCount;
247 typedef typename Generator::ResultElementType ResultType;
248 typedef typename Generator::ResultElementType ResultElementType;
249
250 PHILOX_DEVICE_INLINE
SingleSampleAdapter(Generator * gen)251 explicit SingleSampleAdapter(Generator* gen)
252 : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
253
254 PHILOX_DEVICE_INLINE
operator()255 ResultType operator()() {
256 if (used_result_index_ == Generator::kResultElementCount) {
257 unused_results_ = (*generator_)();
258 used_result_index_ = 0;
259 }
260
261 return unused_results_[used_result_index_++];
262 }
263
264 PHILOX_DEVICE_INLINE
Skip(uint64 num_skips)265 void Skip(uint64 num_skips) {
266 if (!num_skips) {
267 return;
268 }
269 int num_unused_results = kNativeElementCount - used_result_index_;
270 if (num_skips <= num_unused_results) {
271 used_result_index_ += num_skips;
272 return;
273 }
274 num_skips -= num_unused_results;
275 used_result_index_ = kNativeElementCount;
276 SkipFromGenerator(num_skips / kNativeElementCount);
277 num_skips = num_skips % kNativeElementCount;
278 if (num_skips) {
279 unused_results_ = (*generator_)();
280 used_result_index_ = num_skips;
281 }
282 }
283
284 private:
285 // This implementation iteratively skips over `num_skips` samples
286 // from `generator_`. There is an O(1) implementation for PhiloxRandom
287 // in random_distributions.cc.
288 PHILOX_DEVICE_INLINE
SkipFromGenerator(uint64 num_skips)289 void SkipFromGenerator(uint64 num_skips) {
290 while (num_skips--) {
291 (*generator_)();
292 }
293 }
294
295 Generator* generator_;
296 typename Generator::ResultType unused_results_;
297 int used_result_index_;
298 };
299
300 // A class that generates unit normal distribution random numbers from the
301 // underlying random integer generator.
302 // Arguments:
303 // Generator: a generator type that returns a number of uint32 upon each
304 // each invocation. It needs to define kResultElementCount for the
305 // sample count for each invocation, and ResultType for actual
306 // returned sample type.
307 // RealType: the data type of the real numbers that will be returned by the
308 // distribution. This could be either float or double for now.
309 // This class is meant to be implemented through specialization. The default
310 // is not defined by design.
311 template <class Generator, typename RealType>
312 class NormalDistribution;
313
314 PHILOX_DEVICE_INLINE
315 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
316
317 PHILOX_DEVICE_INLINE
318 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1);
319
320 // Exactly like the float version, except that we convert to half afterwards;
321 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
322 // gain from working in half internally.
323 template <class Generator>
324 class NormalDistribution<Generator, Eigen::half> {
325 public:
326 // The number of elements that will be returned.
327 static const int kResultElementCount = Generator::kResultElementCount;
328 // Cost of generation of a single element (in cycles).
329 static const int kElementCost = 70;
330 // Indicate that this distribution may take variable number of samples
331 // during the runtime.
332 static const bool kVariableSamplesPerOutput = false;
333 typedef Array<Eigen::half, kResultElementCount> ResultType;
334 typedef Eigen::half ResultElementType;
335
336 PHILOX_DEVICE_INLINE
operator()337 ResultType operator()(Generator* gen) {
338 typename Generator::ResultType sample = (*gen)();
339 ResultType result;
340 for (int i = 0; i < kResultElementCount; i += 2) {
341 float f[2];
342 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
343 result[i] = Eigen::half(f[0]);
344 result[i + 1] = Eigen::half(f[1]);
345 }
346 return result;
347 }
348 };
349
350 template <class Generator>
351 class NormalDistribution<Generator, bfloat16> {
352 public:
353 // The number of elements that will be returned.
354 static const int kResultElementCount = Generator::kResultElementCount;
355 // Cost of generation of a single element (in cycles).
356 static const int kElementCost = 70;
357 // Indicate that this distribution may take variable number of samples
358 // during the runtime.
359 static const bool kVariableSamplesPerOutput = false;
360 typedef Array<bfloat16, kResultElementCount> ResultType;
361 typedef bfloat16 ResultElementType;
362
363 PHILOX_DEVICE_INLINE
operator()364 ResultType operator()(Generator* gen) {
365 typename Generator::ResultType sample = (*gen)();
366 ResultType result;
367 static_assert(kResultElementCount % 2 == 0, "kResultElementCount should be an even number");
368 for (int i = 0; i < kResultElementCount; i += 2) {
369 float f[2];
370 // Box-Muller transform requires processing 2 elements at a time.
371 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
372 result[i] = bfloat16(f[0]);
373 result[i + 1] = bfloat16(f[1]);
374 }
375 return result;
376 }
377 };
378
379 template <class Generator>
380 class NormalDistribution<Generator, float> {
381 public:
382 // The number of elements that will be returned.
383 static const int kResultElementCount = Generator::kResultElementCount;
384 // Cost of generation of a single element (in cycles).
385 static const int kElementCost = 70;
386 // Indicate that this distribution may take variable number of samples
387 // during the runtime.
388 static const bool kVariableSamplesPerOutput = false;
389 typedef Array<float, kResultElementCount> ResultType;
390 typedef float ResultElementType;
391
392 PHILOX_DEVICE_INLINE
operator()393 ResultType operator()(Generator* gen) {
394 typename Generator::ResultType sample = (*gen)();
395 ResultType result;
396 for (int i = 0; i < kResultElementCount; i += 2) {
397 BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
398 }
399 return result;
400 }
401 };
402
403 template <class Generator>
404 class NormalDistribution<Generator, double> {
405 public:
406 // The number of elements that will be returned.
407 static const int kResultElementCount = Generator::kResultElementCount / 2;
408 // Cost of generation of a single element (in cycles).
409 static const int kElementCost = 70;
410 // Indicate that this distribution may take variable number of samples
411 // during the runtime.
412 static const bool kVariableSamplesPerOutput = false;
413 typedef Array<double, kResultElementCount> ResultType;
414 typedef double ResultElementType;
415
416 PHILOX_DEVICE_INLINE
operator()417 ResultType operator()(Generator* gen) {
418 typename Generator::ResultType sample = (*gen)();
419 ResultType result;
420 for (int i = 0; i < kResultElementCount; i += 2) {
421 const int i2 = 2 * i;
422 BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], sample[i2 + 3], &result[i],
423 &result[i + 1]);
424 }
425 return result;
426 }
427 };
428
429 // A class that returns standard normal distribution between
430 // [-kTruncateValue, kTruncateValue].
431 // Arguments:
432 // Generator: a generator type that returns a number of uint32 upon each
433 // each invocation. It needs to define kResultElementCount for the
434 // sample count for each invocation, and ResultType for actual
435 // returned sample type.
436 // RealType: the data type of the real numbers that will be returned by the
437 // distribution. This could be either float or double for now.
438 // This class is meant to be implemented through specialization. The default
439 // is not defined by design.
440 template <class SingleSampleGenerator, typename RealType>
441 class TruncatedNormalDistribution;
442
443 // Exactly like the float version, except that we convert to half afterwards;
444 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
445 // gain from working in half internally.
446 template <class SingleSampleGenerator>
447 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
448 public:
449 // The number of elements that will be returned.
450 static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
451 // Cost of generation of a single element (in cycles).
452 static const int kElementCost = 90;
453 // Indicate that this distribution may take variable number of samples
454 // during the runtime.
455 static const bool kVariableSamplesPerOutput = true;
456 // The threshold where the normal distribution is truncated.
457 const float kTruncateValue = 2.0f;
458
459 typedef Array<Eigen::half, kResultElementCount> ResultType;
460 typedef Eigen::half ResultElementType;
461
462 PHILOX_DEVICE_INLINE
operator()463 ResultType operator()(SingleSampleGenerator* gen) {
464 ResultType results;
465 int index = 0;
466 while (true) {
467 // Repeatedly take samples from the normal distribution, until we have
468 // the desired number of elements that fall within the pre-defined cutoff
469 // threshold.
470 const uint32 x0 = (*gen)();
471 const uint32 x1 = (*gen)();
472 float f[2];
473 BoxMullerFloat(x0, x1, &f[0], &f[1]);
474
475 for (int i = 0; i < 2; ++i) {
476 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
477 results[index++] = Eigen::half(f[i]);
478 if (index >= kResultElementCount) {
479 return results;
480 }
481 }
482 }
483 }
484 }
485 };
486
487 template <class SingleSampleGenerator>
488 class TruncatedNormalDistribution<SingleSampleGenerator, bfloat16> {
489 public:
490 // The number of elements that will be returned.
491 static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
492 // Cost of generation of a single element (in cycles).
493 static const int kElementCost = 90;
494 // Indicate that this distribution may take variable number of samples
495 // during the runtime.
496 static const bool kVariableSamplesPerOutput = true;
497 // The threshold where the normal distribution is truncated.
498 const float kTruncateValue = 2.0f;
499
500 typedef Array<bfloat16, kResultElementCount> ResultType;
501 typedef bfloat16 ResultElementType;
502
503 PHILOX_DEVICE_INLINE
operator()504 ResultType operator()(SingleSampleGenerator* gen) {
505 ResultType results;
506 int index = 0;
507 while (true) {
508 // Repeatedly take samples from the normal distribution, until we have
509 // the desired number of elements that fall within the pre-defined cutoff
510 // threshold.
511 const uint32 x0 = (*gen)();
512 const uint32 x1 = (*gen)();
513 float f[2];
514 BoxMullerFloat(x0, x1, &f[0], &f[1]);
515
516 for (int i = 0; i < 2; ++i) {
517 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
518 results[index++] = bfloat16(f[i]);
519 if (index >= kResultElementCount) {
520 return results;
521 }
522 }
523 }
524 }
525 }
526 };
527
528 // Partial specialization for float.
529 template <class SingleSampleGenerator>
530 class TruncatedNormalDistribution<SingleSampleGenerator, float> {
531 public:
532 // The number of elements that will be returned.
533 static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
534 // Cost of generation of a single element (in cycles).
535 static const int kElementCost = 90;
536 // Indicate that this distribution may take variable number of samples
537 // during the runtime.
538 static const bool kVariableSamplesPerOutput = true;
539 // The threshold where the normal distribution is truncated.
540 const float kTruncateValue = 2.0f;
541
542 typedef Array<float, kResultElementCount> ResultType;
543 typedef float ResultElementType;
544
545 PHILOX_DEVICE_INLINE
operator()546 ResultType operator()(SingleSampleGenerator* gen) {
547 ResultType results;
548 int index = 0;
549 while (true) {
550 // Repeatedly take samples from the normal distribution, until we have
551 // the desired number of elements that fall within the pre-defined cutoff
552 // threshold.
553 const uint32 x0 = (*gen)();
554 const uint32 x1 = (*gen)();
555 float f[2];
556 BoxMullerFloat(x0, x1, &f[0], &f[1]);
557
558 for (int i = 0; i < 2; ++i) {
559 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
560 results[index++] = f[i];
561 if (index >= kResultElementCount) {
562 return results;
563 }
564 }
565 }
566 }
567 }
568 };
569
570 // Partial specialization for double.
571 template <class SingleSampleGenerator>
572 class TruncatedNormalDistribution<SingleSampleGenerator, double> {
573 public:
574 // The number of elements that will be returned.
575 static const int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
576 ? SingleSampleGenerator::kNativeElementCount / 2
577 : 1;
578 // Cost of generation of a single element (in cycles).
579 static const int kElementCost = 90;
580 // Indicate that this distribution may take variable number of samples
581 // during the runtime.
582 static const bool kVariableSamplesPerOutput = true;
583 typedef Array<double, kResultElementCount> ResultType;
584 typedef double ResultElementType;
585 const double kTruncateValue = 2.0;
586
587 PHILOX_DEVICE_INLINE
operator()588 ResultType operator()(SingleSampleGenerator* gen) {
589 ResultType results;
590 int index = 0;
591 while (1) {
592 const uint32 x0 = (*gen)();
593 const uint32 x1 = (*gen)();
594 const uint32 x2 = (*gen)();
595 const uint32 x3 = (*gen)();
596 double d[2];
597 BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
598
599 for (int i = 0; i < 2; ++i) {
600 if (Eigen::numext::abs(d[i]) < kTruncateValue) {
601 results[index++] = d[i];
602 if (index >= kResultElementCount) {
603 return results;
604 }
605 }
606 }
607 }
608 }
609 };
610
611 // Helper function to convert two 32-bit uniform integers to two floats
612 // under the unit normal distribution.
613 PHILOX_DEVICE_INLINE
BoxMullerFloat(uint32 x0,uint32 x1,float * f0,float * f1)614 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
615 // This function implements the Box-Muller transform:
616 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
617 // Do not send a really small number to log().
618 // We cannot mark "epsilon" as "static const" because NVCC would complain
619 const float epsilon = 1.0e-7f;
620 float u1 = Uint32ToFloat(x0);
621 if (u1 < epsilon) {
622 u1 = epsilon;
623 }
624 const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
625 const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
626 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
627 *f0 = Eigen::numext::sin(v1);
628 *f1 = Eigen::numext::cos(v1);
629 #else
630 sincosf(v1, f0, f1);
631 #endif
632 *f0 *= u2;
633 *f1 *= u2;
634 }
635
636 // Helper function to convert four 32-bit uniform integers to two doubles
637 // under the unit normal distribution.
638 PHILOX_DEVICE_INLINE
BoxMullerDouble(uint32 x0,uint32 x1,uint32 x2,uint32 x3,double * d0,double * d1)639 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1) {
640 // This function implements the Box-Muller transform:
641 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
642 // Do not send a really small number to log().
643 // We cannot mark "epsilon" as "static const" because NVCC would complain
644 const double epsilon = 1.0e-7;
645 double u1 = Uint64ToDouble(x0, x1);
646 if (u1 < epsilon) {
647 u1 = epsilon;
648 }
649 const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
650 const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
651 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
652 *d0 = Eigen::numext::sin(v1);
653 *d1 = Eigen::numext::cos(v1);
654 #else
655 sincos(v1, d0, d1);
656 #endif
657 *d0 *= u2;
658 *d1 *= u2;
659 }
660
661 // Helper function to convert an 16-bit integer to a half between [0..1).
Uint16ToHalf(uint16 x)662 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
663 // IEEE754 halfs are formatted as follows (MSB first):
664 // sign(1) exponent(5) mantissa(10)
665 // Conceptually construct the following:
666 // sign == 0
667 // exponent == 15 -- an excess 15 representation of a zero exponent
668 // mantissa == 10 random bits
669 const uint16 man = x & 0x3ffu; // 10 bit mantissa
670 const uint16 exp = static_cast<uint16>(15);
671 const uint16 val = (exp << 10) | man;
672
673 Eigen::half result;
674 result.x = val;
675 return result - Eigen::half(1.0);
676 }
677
678 // Helper function to convert an 16-bit integer to a bfloat16 between [0..1).
679 // This can create a uniform distribution of values between [0..1).
Uint16ToGfloat16(uint16 x)680 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) {
681 // bfloat are formatted as follows (MSB first):
682 // sign(1) exponent(8) mantissa(7)
683 // Conceptually construct the following:
684 // sign == 0
685 // exponent == 127 -- an excess 127 representation of a zero exponent
686 // mantissa == 7 random bits
687 const uint16 man = x & 0x7fu; // 7 bit mantissa
688 const uint16 exp = static_cast<uint16>(127);
689 const uint16 val = (exp << 7) | man;
690
691 bfloat16 result;
692 memcpy(&result, &val, sizeof(val));
693 // The mantissa has an implicit leading 1, so the above code creates a value
694 // in [1, 2). The minus will not cause a rounding that makes the result 1.
695 // Instead it will just be close to 1.
696 return result - bfloat16(1.0);
697 }
698
699 // Helper function to convert an 32-bit integer to a float between [0..1).
Uint32ToFloat(uint32 x)700 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
701 // IEEE754 floats are formatted as follows (MSB first):
702 // sign(1) exponent(8) mantissa(23)
703 // Conceptually construct the following:
704 // sign == 0
705 // exponent == 127 -- an excess 127 representation of a zero exponent
706 // mantissa == 23 random bits
707 const uint32 man = x & 0x7fffffu; // 23 bit mantissa
708 const uint32 exp = static_cast<uint32>(127);
709 const uint32 val = (exp << 23) | man;
710
711 // Assumes that endian-ness is same for float and uint32.
712 float result;
713 memcpy(&result, &val, sizeof(val));
714 return result - 1.0f;
715 }
716
717 // Helper function to convert two 32-bit integers to a double between [0..1).
Uint64ToDouble(uint32 x0,uint32 x1)718 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
719 // IEEE754 doubles are formatted as follows (MSB first):
720 // sign(1) exponent(11) mantissa(52)
721 // Conceptually construct the following:
722 // sign == 0
723 // exponent == 1023 -- an excess 1023 representation of a zero exponent
724 // mantissa == 52 random bits
725 const uint32 mhi = x0 & 0xfffffu; // upper 20 bits of mantissa
726 const uint32 mlo = x1; // lower 32 bits of mantissa
727 const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo; // mantissa
728 const uint64 exp = static_cast<uint64>(1023);
729 const uint64 val = (exp << 52) | man;
730 // Assumes that endian-ness is same for double and uint64.
731 double result;
732 memcpy(&result, &val, sizeof(val));
733 return result - 1.0;
734 }
735
736 } // namespace random
737 } // namespace tensorflow
738
739 #endif // TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
740