• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
17 #define TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
18 
19 #define _USE_MATH_DEFINES
20 #include <math.h>
21 
22 #include <cmath>
23 #undef _USE_MATH_DEFINES
24 
25 #include <string.h>
26 #include <tensorflow/core/lib/bfloat16/bfloat16.h>
27 
28 #include <algorithm>
29 #include <type_traits>
30 #include <unsupported/Eigen/CXX11/Tensor>
31 
32 #include "philox_random.h"
33 
34 namespace tensorflow {
35 namespace random {
36 
37 // Helper function to convert a 16-bit integer to a half between [0..1).
38 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
39 // Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
40 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
41 // Helper function to convert a 32-bit integer to a float between [0..1).
42 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
43 // Helper function to convert two 32-bit integers to a double between [0..1).
44 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
45 
46 // Computes a + b. Requires that the result is representable in the destination
47 // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
48 // need *not* be representable in that type. (The condition on b excludes the
49 // extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
50 // compute.)
51 template <typename Int>
SignedAdd(Int a,typename std::make_unsigned<Int>::type b)52 PHILOX_DEVICE_INLINE Int SignedAdd(Int a, typename std::make_unsigned<Int>::type b) {
53     // Implementation note: both b_div_2 and b - b_div_2 are positive and
54     // representatble as Int.
55     auto b_div_2 = b >> 1;
56     return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
57 }
58 
59 // A class that generates uniform distribution random numbers from the
60 // underlying random integer generator.
61 // Arguments:
62 //   Generator: a generator type that returns a number of uint32 upon each
63 //              invocation. It needs to define kResultElementCount for the
64 //              sample count for each invocation, and ResultType for the
65 //              actual returned sample type.
66 //   RealType: the data type of the real numbers that will be returned by the
67 //             distribution. This could be either float or double for now.
68 // This class is meant to be implemented through specialization. The default
69 // is not defined by design.
70 template <class Generator, typename RealType>
71 class UniformDistribution;
72 
73 template <class Generator>
74 class UniformDistribution<Generator, Eigen::half> {
75    public:
76     // The number of elements that will be returned.
77     static const int kResultElementCount = Generator::kResultElementCount;
78     // Cost of generation of a single element (in cycles).
79     static const int kElementCost = 3;
80     // Indicate that this distribution may take variable number of samples
81     // during the runtime.
82     static const bool kVariableSamplesPerOutput = false;
83     typedef Array<Eigen::half, kResultElementCount> ResultType;
84     typedef Eigen::half ResultElementType;
85 
86     PHILOX_DEVICE_INLINE
operator()87     ResultType operator()(Generator* gen) {
88         typename Generator::ResultType sample = (*gen)();
89         ResultType result;
90         for (int i = 0; i < kResultElementCount; ++i) {
91             result[i] = Uint16ToHalf(sample[i]);  // Truncate the upper 16 bits.
92         }
93         return result;
94     }
95 };
96 
97 template <class Generator>
98 class UniformDistribution<Generator, bfloat16> {
99    public:
100     // The number of elements that will be returned.
101     static const int kResultElementCount = Generator::kResultElementCount;
102     // Cost of generation of a single element (in cycles).
103     static const int kElementCost = 3;
104     // Indicate that this distribution may take variable number of samples
105     // during the runtime.
106     static const bool kVariableSamplesPerOutput = false;
107     typedef Array<bfloat16, kResultElementCount> ResultType;
108     typedef bfloat16 ResultElementType;
109 
110     PHILOX_DEVICE_INLINE
operator()111     ResultType operator()(Generator* gen) {
112         typename Generator::ResultType sample = (*gen)();
113         ResultType result;
114         for (int i = 0; i < kResultElementCount; ++i) {
115             result[i] = Uint16ToGfloat16(sample[i]);
116         }
117         return result;
118     }
119 };
120 
121 template <class Generator>
122 class UniformDistribution<Generator, float> {
123    public:
124     // The number of elements that will be returned.
125     static const int kResultElementCount = Generator::kResultElementCount;
126     // Cost of generation of a single element (in cycles).
127     static const int kElementCost = 3;
128     // Indicate that this distribution may take variable number of samples
129     // during the runtime.
130     static const bool kVariableSamplesPerOutput = false;
131     typedef Array<float, kResultElementCount> ResultType;
132     typedef float ResultElementType;
133 
134     PHILOX_DEVICE_INLINE
operator()135     ResultType operator()(Generator* gen) {
136         typename Generator::ResultType sample = (*gen)();
137         ResultType result;
138         for (int i = 0; i < kResultElementCount; ++i) {
139             result[i] = Uint32ToFloat(sample[i]);
140         }
141         return result;
142     }
143 };
144 
145 template <class Generator>
146 class UniformDistribution<Generator, double> {
147    public:
148     // The number of elements that will be returned.
149     static const int kResultElementCount = Generator::kResultElementCount / 2;
150     // Cost of generation of a single element (in cycles).
151     static const int kElementCost = 3;
152     // Indicate that this distribution may take variable number of samples
153     // during the runtime.
154     static const bool kVariableSamplesPerOutput = false;
155     typedef Array<double, kResultElementCount> ResultType;
156     typedef double ResultElementType;
157 
158     PHILOX_DEVICE_INLINE
operator()159     ResultType operator()(Generator* gen) {
160         typename Generator::ResultType sample = (*gen)();
161         ResultType result;
162         for (int i = 0; i < kResultElementCount; ++i) {
163             result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
164         }
165         return result;
166     }
167 };
168 
169 template <class Generator>
170 class UniformDistribution<Generator, int32> {
171    public:
172     // The number of elements that will be returned.
173     static const int kResultElementCount = Generator::kResultElementCount;
174     // Cost of generation of a single element (in cycles).
175     static const int kElementCost = 3;
176     // Indicate that this distribution may take variable number of samples
177     // during the runtime.
178     static const bool kVariableSamplesPerOutput = false;
179     typedef Array<int32, kResultElementCount> ResultType;
180     typedef int32 ResultElementType;
181 
182     // Must have lo < hi
UniformDistribution(int32 lo,int32 hi)183     UniformDistribution(int32 lo, int32 hi)
184         : lo_(lo), range_(static_cast<uint32>(hi) - static_cast<uint32>(lo)) {}
185 
186     PHILOX_DEVICE_INLINE
operator()187     ResultType operator()(Generator* gen) {
188         typename Generator::ResultType sample = (*gen)();
189         ResultType result;
190         for (int i = 0; i < kResultElementCount; ++i) {
191             result[i] = SignedAdd(lo_, sample[i] % range_);
192         }
193         return result;
194     }
195 
196    private:
197     // Note that lo_ is intentionally signed while range_ is intentionally
198     // unsigned.  This is because hi - lo can overflow signed integers if
199     // lo < 0 < hi, but always fits in unsigned.
200     int32 lo_;
201     uint32 range_;
202 };
203 
204 template <class Generator>
205 class UniformDistribution<Generator, int64> {
206    public:
207     // The number of elements that will be returned.
208     static const int kResultElementCount = Generator::kResultElementCount / 2;
209     // Cost of generation of a single element (in cycles).
210     static const int kElementCost = 3;
211     // Indicate that this distribution may take variable number of samples
212     // during the runtime.
213     static const bool kVariableSamplesPerOutput = false;
214     typedef Array<int64, kResultElementCount> ResultType;
215     typedef int64 ResultElementType;
216 
217     // Must have lo < hi
UniformDistribution(int64 lo,int64 hi)218     UniformDistribution(int64 lo, int64 hi)
219         : lo_(lo), range_(static_cast<uint64>(hi) - static_cast<uint64>(lo)) {}
220 
221     PHILOX_DEVICE_INLINE
operator()222     ResultType operator()(Generator* gen) {
223         typename Generator::ResultType sample = (*gen)();
224         ResultType result;
225         for (int i = 0; i < kResultElementCount; ++i) {
226             auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
227             result[i] = SignedAdd(lo_, bits % range_);
228         }
229         return result;
230     }
231 
232    private:
233     // Note that lo_ is intentionally signed while range_ is intentionally
234     // unsigned.  This is because hi - lo can overflow signed integers if
235     // lo < 0 < hi, but always fits in unsigned.
236     int64 lo_;
237     uint64 range_;
238 };
239 
240 // A class that adapts the underlying native multiple samples to return a single
241 // sample at a time.
242 template <class Generator>
243 class SingleSampleAdapter {
244    public:
245     // The number of elements that will be returned.
246     static const int kResultElementCount = 1;
247     // The number of elements that will be returned by the underlying generator.
248     static const int kNativeElementCount = Generator::kResultElementCount;
249     typedef typename Generator::ResultElementType ResultType;
250     typedef typename Generator::ResultElementType ResultElementType;
251 
252     PHILOX_DEVICE_INLINE
SingleSampleAdapter(Generator * gen)253     explicit SingleSampleAdapter(Generator* gen)
254         : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
255 
256     PHILOX_DEVICE_INLINE
operator()257     ResultType operator()() {
258         if (used_result_index_ == Generator::kResultElementCount) {
259             unused_results_ = (*generator_)();
260             used_result_index_ = 0;
261         }
262 
263         return unused_results_[used_result_index_++];
264     }
265 
266     PHILOX_DEVICE_INLINE
Skip(uint64 num_skips)267     void Skip(uint64 num_skips) {
268         if (!num_skips) {
269             return;
270         }
271         int num_unused_results = kNativeElementCount - used_result_index_;
272         if (num_skips <= num_unused_results) {
273             used_result_index_ += num_skips;
274             return;
275         }
276         num_skips -= num_unused_results;
277         used_result_index_ = kNativeElementCount;
278         SkipFromGenerator(num_skips / kNativeElementCount);
279         num_skips = num_skips % kNativeElementCount;
280         if (num_skips) {
281             unused_results_ = (*generator_)();
282             used_result_index_ = num_skips;
283         }
284     }
285 
286    private:
287     // This implementation iteratively skips over `num_skips` samples
288     // from `generator_`. There is an O(1) implementation for PhiloxRandom
289     // in random_distributions.cc.
290     PHILOX_DEVICE_INLINE
SkipFromGenerator(uint64 num_skips)291     void SkipFromGenerator(uint64 num_skips) {
292         while (num_skips--) {
293             (*generator_)();
294         }
295     }
296 
297     Generator* generator_;
298     typename Generator::ResultType unused_results_;
299     int used_result_index_;
300 };
301 
302 // A class that generates unit normal distribution random numbers from the
303 // underlying random integer generator.
304 // Arguments:
305 //   Generator: a generator type that returns a number of uint32 upon each
306 //              each invocation. It needs to define kResultElementCount for the
307 //              sample count for each invocation, and ResultType for actual
308 //              returned sample type.
309 //   RealType: the data type of the real numbers that will be returned by the
310 //             distribution. This could be either float or double for now.
311 // This class is meant to be implemented through specialization. The default
312 // is not defined by design.
313 template <class Generator, typename RealType>
314 class NormalDistribution;
315 
316 PHILOX_DEVICE_INLINE
317 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
318 
319 PHILOX_DEVICE_INLINE
320 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1);
321 
322 // Exactly like the float version, except that we convert to half afterwards;
323 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
324 // gain from working in half internally.
325 template <class Generator>
326 class NormalDistribution<Generator, Eigen::half> {
327    public:
328     // The number of elements that will be returned.
329     static const int kResultElementCount = Generator::kResultElementCount;
330     // Cost of generation of a single element (in cycles).
331     static const int kElementCost = 70;
332     // Indicate that this distribution may take variable number of samples
333     // during the runtime.
334     static const bool kVariableSamplesPerOutput = false;
335     typedef Array<Eigen::half, kResultElementCount> ResultType;
336     typedef Eigen::half ResultElementType;
337 
338     PHILOX_DEVICE_INLINE
operator()339     ResultType operator()(Generator* gen) {
340         typename Generator::ResultType sample = (*gen)();
341         ResultType result;
342         for (int i = 0; i < kResultElementCount; i += 2) {
343             float f[2];
344             BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
345             result[i] = Eigen::half(f[0]);
346             result[i + 1] = Eigen::half(f[1]);
347         }
348         return result;
349     }
350 };
351 
352 template <class Generator>
353 class NormalDistribution<Generator, bfloat16> {
354    public:
355     // The number of elements that will be returned.
356     static const int kResultElementCount = Generator::kResultElementCount;
357     // Cost of generation of a single element (in cycles).
358     static const int kElementCost = 70;
359     // Indicate that this distribution may take variable number of samples
360     // during the runtime.
361     static const bool kVariableSamplesPerOutput = false;
362     typedef Array<bfloat16, kResultElementCount> ResultType;
363     typedef bfloat16 ResultElementType;
364 
365     PHILOX_DEVICE_INLINE
operator()366     ResultType operator()(Generator* gen) {
367         typename Generator::ResultType sample = (*gen)();
368         ResultType result;
369         static_assert(kResultElementCount % 2 == 0, "kResultElementCount should be an even number");
370         for (int i = 0; i < kResultElementCount; i += 2) {
371             float f[2];
372             // Box-Muller transform requires processing 2 elements at a time.
373             BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
374             result[i] = bfloat16(f[0]);
375             result[i + 1] = bfloat16(f[1]);
376         }
377         return result;
378     }
379 };
380 
381 template <class Generator>
382 class NormalDistribution<Generator, float> {
383    public:
384     // The number of elements that will be returned.
385     static const int kResultElementCount = Generator::kResultElementCount;
386     // Cost of generation of a single element (in cycles).
387     static const int kElementCost = 70;
388     // Indicate that this distribution may take variable number of samples
389     // during the runtime.
390     static const bool kVariableSamplesPerOutput = false;
391     typedef Array<float, kResultElementCount> ResultType;
392     typedef float ResultElementType;
393 
394     PHILOX_DEVICE_INLINE
operator()395     ResultType operator()(Generator* gen) {
396         typename Generator::ResultType sample = (*gen)();
397         ResultType result;
398         for (int i = 0; i < kResultElementCount; i += 2) {
399             BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
400         }
401         return result;
402     }
403 };
404 
405 template <class Generator>
406 class NormalDistribution<Generator, double> {
407    public:
408     // The number of elements that will be returned.
409     static const int kResultElementCount = Generator::kResultElementCount / 2;
410     // Cost of generation of a single element (in cycles).
411     static const int kElementCost = 70;
412     // Indicate that this distribution may take variable number of samples
413     // during the runtime.
414     static const bool kVariableSamplesPerOutput = false;
415     typedef Array<double, kResultElementCount> ResultType;
416     typedef double ResultElementType;
417 
418     PHILOX_DEVICE_INLINE
operator()419     ResultType operator()(Generator* gen) {
420         typename Generator::ResultType sample = (*gen)();
421         ResultType result;
422         for (int i = 0; i < kResultElementCount; i += 2) {
423             const int i2 = 2 * i;
424             BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], sample[i2 + 3], &result[i],
425                             &result[i + 1]);
426         }
427         return result;
428     }
429 };
430 
431 // A class that returns standard normal distribution between
432 // [-kTruncateValue, kTruncateValue].
433 // Arguments:
434 //   Generator: a generator type that returns a number of uint32 upon each
435 //              each invocation. It needs to define kResultElementCount for the
436 //              sample count for each invocation, and ResultType for actual
437 //              returned sample type.
438 //   RealType: the data type of the real numbers that will be returned by the
439 //             distribution. This could be either float or double for now.
440 // This class is meant to be implemented through specialization. The default
441 // is not defined by design.
442 template <class SingleSampleGenerator, typename RealType>
443 class TruncatedNormalDistribution;
444 
445 // Exactly like the float version, except that we convert to half afterwards;
446 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
447 // gain from working in half internally.
448 template <class SingleSampleGenerator>
449 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
450    public:
451     // The number of elements that will be returned.
452     static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
453     // Cost of generation of a single element (in cycles).
454     static const int kElementCost = 90;
455     // Indicate that this distribution may take variable number of samples
456     // during the runtime.
457     static const bool kVariableSamplesPerOutput = true;
458     // The threshold where the normal distribution is truncated.
459     const float kTruncateValue = 2.0f;
460 
461     typedef Array<Eigen::half, kResultElementCount> ResultType;
462     typedef Eigen::half ResultElementType;
463 
464     PHILOX_DEVICE_INLINE
operator()465     ResultType operator()(SingleSampleGenerator* gen) {
466         ResultType results;
467         int index = 0;
468         while (true) {
469             // Repeatedly take samples from the normal distribution, until we have
470             // the desired number of elements that fall within the pre-defined cutoff
471             // threshold.
472             const uint32 x0 = (*gen)();
473             const uint32 x1 = (*gen)();
474             float f[2];
475             BoxMullerFloat(x0, x1, &f[0], &f[1]);
476 
477             for (int i = 0; i < 2; ++i) {
478                 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
479                     results[index++] = Eigen::half(f[i]);
480                     if (index >= kResultElementCount) {
481                         return results;
482                     }
483                 }
484             }
485         }
486     }
487 };
488 
489 template <class SingleSampleGenerator>
490 class TruncatedNormalDistribution<SingleSampleGenerator, bfloat16> {
491    public:
492     // The number of elements that will be returned.
493     static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
494     // Cost of generation of a single element (in cycles).
495     static const int kElementCost = 90;
496     // Indicate that this distribution may take variable number of samples
497     // during the runtime.
498     static const bool kVariableSamplesPerOutput = true;
499     // The threshold where the normal distribution is truncated.
500     const float kTruncateValue = 2.0f;
501 
502     typedef Array<bfloat16, kResultElementCount> ResultType;
503     typedef bfloat16 ResultElementType;
504 
505     PHILOX_DEVICE_INLINE
operator()506     ResultType operator()(SingleSampleGenerator* gen) {
507         ResultType results;
508         int index = 0;
509         while (true) {
510             // Repeatedly take samples from the normal distribution, until we have
511             // the desired number of elements that fall within the pre-defined cutoff
512             // threshold.
513             const uint32 x0 = (*gen)();
514             const uint32 x1 = (*gen)();
515             float f[2];
516             BoxMullerFloat(x0, x1, &f[0], &f[1]);
517 
518             for (int i = 0; i < 2; ++i) {
519                 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
520                     results[index++] = bfloat16(f[i]);
521                     if (index >= kResultElementCount) {
522                         return results;
523                     }
524                 }
525             }
526         }
527     }
528 };
529 
530 // Partial specialization for float.
531 template <class SingleSampleGenerator>
532 class TruncatedNormalDistribution<SingleSampleGenerator, float> {
533    public:
534     // The number of elements that will be returned.
535     static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
536     // Cost of generation of a single element (in cycles).
537     static const int kElementCost = 90;
538     // Indicate that this distribution may take variable number of samples
539     // during the runtime.
540     static const bool kVariableSamplesPerOutput = true;
541     // The threshold where the normal distribution is truncated.
542     const float kTruncateValue = 2.0f;
543 
544     typedef Array<float, kResultElementCount> ResultType;
545     typedef float ResultElementType;
546 
547     PHILOX_DEVICE_INLINE
operator()548     ResultType operator()(SingleSampleGenerator* gen) {
549         ResultType results;
550         int index = 0;
551         while (true) {
552             // Repeatedly take samples from the normal distribution, until we have
553             // the desired number of elements that fall within the pre-defined cutoff
554             // threshold.
555             const uint32 x0 = (*gen)();
556             const uint32 x1 = (*gen)();
557             float f[2];
558             BoxMullerFloat(x0, x1, &f[0], &f[1]);
559 
560             for (int i = 0; i < 2; ++i) {
561                 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
562                     results[index++] = f[i];
563                     if (index >= kResultElementCount) {
564                         return results;
565                     }
566                 }
567             }
568         }
569     }
570 };
571 
572 // Partial specialization for double.
573 template <class SingleSampleGenerator>
574 class TruncatedNormalDistribution<SingleSampleGenerator, double> {
575    public:
576     // The number of elements that will be returned.
577     static const int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
578                                                    ? SingleSampleGenerator::kNativeElementCount / 2
579                                                    : 1;
580     // Cost of generation of a single element (in cycles).
581     static const int kElementCost = 90;
582     // Indicate that this distribution may take variable number of samples
583     // during the runtime.
584     static const bool kVariableSamplesPerOutput = true;
585     typedef Array<double, kResultElementCount> ResultType;
586     typedef double ResultElementType;
587     const double kTruncateValue = 2.0;
588 
589     PHILOX_DEVICE_INLINE
operator()590     ResultType operator()(SingleSampleGenerator* gen) {
591         ResultType results;
592         int index = 0;
593         while (1) {
594             const uint32 x0 = (*gen)();
595             const uint32 x1 = (*gen)();
596             const uint32 x2 = (*gen)();
597             const uint32 x3 = (*gen)();
598             double d[2];
599             BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
600 
601             for (int i = 0; i < 2; ++i) {
602                 if (Eigen::numext::abs(d[i]) < kTruncateValue) {
603                     results[index++] = d[i];
604                     if (index >= kResultElementCount) {
605                         return results;
606                     }
607                 }
608             }
609         }
610     }
611 };
612 
613 // Helper function to convert two 32-bit uniform integers to two floats
614 // under the unit normal distribution.
615 PHILOX_DEVICE_INLINE
BoxMullerFloat(uint32 x0,uint32 x1,float * f0,float * f1)616 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
617     // This function implements the Box-Muller transform:
618     // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
619     // Do not send a really small number to log().
620     // We cannot mark "epsilon" as "static const" because NVCC would complain
621     const float epsilon = 1.0e-7f;
622     float u1 = Uint32ToFloat(x0);
623     if (u1 < epsilon) {
624         u1 = epsilon;
625     }
626     const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
627     const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
628 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
629     *f0 = Eigen::numext::sin(v1);
630     *f1 = Eigen::numext::cos(v1);
631 #else
632     sincosf(v1, f0, f1);
633 #endif
634     *f0 *= u2;
635     *f1 *= u2;
636 }
637 
638 // Helper function to convert four 32-bit uniform integers to two doubles
639 // under the unit normal distribution.
640 PHILOX_DEVICE_INLINE
BoxMullerDouble(uint32 x0,uint32 x1,uint32 x2,uint32 x3,double * d0,double * d1)641 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1) {
642     // This function implements the Box-Muller transform:
643     // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
644     // Do not send a really small number to log().
645     // We cannot mark "epsilon" as "static const" because NVCC would complain
646     const double epsilon = 1.0e-7;
647     double u1 = Uint64ToDouble(x0, x1);
648     if (u1 < epsilon) {
649         u1 = epsilon;
650     }
651     const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
652     const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
653 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
654     *d0 = Eigen::numext::sin(v1);
655     *d1 = Eigen::numext::cos(v1);
656 #else
657     sincos(v1, d0, d1);
658 #endif
659     *d0 *= u2;
660     *d1 *= u2;
661 }
662 
663 // Helper function to convert an 16-bit integer to a half between [0..1).
Uint16ToHalf(uint16 x)664 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
665     // IEEE754 halfs are formatted as follows (MSB first):
666     //    sign(1) exponent(5) mantissa(10)
667     // Conceptually construct the following:
668     //    sign == 0
669     //    exponent == 15  -- an excess 15 representation of a zero exponent
670     //    mantissa == 10 random bits
671     const uint16 man = x & 0x3ffu;  // 10 bit mantissa
672     const uint16 exp = static_cast<uint16>(15);
673     const uint16 val = (exp << 10) | man;
674 
675     Eigen::half result;
676     result.x = val;
677     return result - Eigen::half(1.0);
678 }
679 
680 // Helper function to convert an 16-bit integer to a bfloat16 between [0..1).
681 // This can create a uniform distribution of values between [0..1).
Uint16ToGfloat16(uint16 x)682 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) {
683     // bfloat are formatted as follows (MSB first):
684     //    sign(1) exponent(8) mantissa(7)
685     // Conceptually construct the following:
686     //    sign == 0
687     //    exponent == 127  -- an excess 127 representation of a zero exponent
688     //    mantissa == 7 random bits
689     const uint16 man = x & 0x7fu;  // 7 bit mantissa
690     const uint16 exp = static_cast<uint16>(127);
691     const uint16 val = (exp << 7) | man;
692 
693     bfloat16 result;
694     memcpy(&result, &val, sizeof(val));
695     // The mantissa has an implicit leading 1, so the above code creates a value
696     // in [1, 2). The minus will not cause a rounding that makes the result 1.
697     // Instead it will just be close to 1.
698     return result - bfloat16(1.0);
699 }
700 
701 // Helper function to convert an 32-bit integer to a float between [0..1).
Uint32ToFloat(uint32 x)702 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
703     // IEEE754 floats are formatted as follows (MSB first):
704     //    sign(1) exponent(8) mantissa(23)
705     // Conceptually construct the following:
706     //    sign == 0
707     //    exponent == 127  -- an excess 127 representation of a zero exponent
708     //    mantissa == 23 random bits
709     const uint32 man = x & 0x7fffffu;  // 23 bit mantissa
710     const uint32 exp = static_cast<uint32>(127);
711     const uint32 val = (exp << 23) | man;
712 
713     // Assumes that endian-ness is same for float and uint32.
714     float result;
715     memcpy(&result, &val, sizeof(val));
716     return result - 1.0f;
717 }
718 
719 // Helper function to convert two 32-bit integers to a double between [0..1).
Uint64ToDouble(uint32 x0,uint32 x1)720 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
721     // IEEE754 doubles are formatted as follows (MSB first):
722     //    sign(1) exponent(11) mantissa(52)
723     // Conceptually construct the following:
724     //    sign == 0
725     //    exponent == 1023  -- an excess 1023 representation of a zero exponent
726     //    mantissa == 52 random bits
727     const uint32 mhi = x0 & 0xfffffu;                           // upper 20 bits of mantissa
728     const uint32 mlo = x1;                                      // lower 32 bits of mantissa
729     const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo;  // mantissa
730     const uint64 exp = static_cast<uint64>(1023);
731     const uint64 val = (exp << 52) | man;
732     // Assumes that endian-ness is same for double and uint64.
733     double result;
734     memcpy(&result, &val, sizeof(val));
735     return result - 1.0;
736 }
737 
738 }  // namespace random
739 }  // namespace tensorflow
740 
741 #endif  // TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
742