• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
17 #define TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
18 
19 #define _USE_MATH_DEFINES
20 #include <math.h>
21 #include <cmath>
22 #undef _USE_MATH_DEFINES
23 
24 #include <string.h>
25 #include <algorithm>
26 
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/lib/random/philox_random.h"
29 
30 namespace tensorflow {
31 namespace random {
32 
33 // Helper function to convert a 16-bit integer to a half between [0..1).
34 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
35 // Helper function to convert a 32-bit integer to a float between [0..1).
36 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
37 // Helper function to convert two 32-bit integers to a double between [0..1).
38 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
39 
40 // A class that generates uniform distribution random numbers from the
41 // underlying random integer generator.
42 // Arguments:
43 //   Generator: a generator type that returns a number of uint32 upon each
44 //              invocation. It needs to define kResultElementCount for the
45 //              sample count for each invocation, and ResultType for the
46 //              actual returned sample type.
47 //   RealType: the data type of the real numbers that will be returned by the
48 //             distribution. This could be either float or double for now.
49 // This class is meant to be implemented through specialization. The default
50 // is not defined by design.
51 template <class Generator, typename RealType>
52 class UniformDistribution;
53 
54 template <class Generator>
55 class UniformDistribution<Generator, Eigen::half> {
56  public:
57   // The number of elements that will be returned.
58   static const int kResultElementCount = Generator::kResultElementCount;
59   // Cost of generation of a single element (in cycles).
60   static const int kElementCost = 3;
61   // Indicate that this distribution may take variable number of samples
62   // during the runtime.
63   static const bool kVariableSamplesPerOutput = false;
64   typedef Array<Eigen::half, kResultElementCount> ResultType;
65   typedef Eigen::half ResultElementType;
66 
67   PHILOX_DEVICE_INLINE
operator()68   ResultType operator()(Generator* gen) {
69     typename Generator::ResultType sample = (*gen)();
70     ResultType result;
71     for (int i = 0; i < kResultElementCount; ++i) {
72       result[i] = Uint16ToHalf(sample[i]);  // Truncate the upper 16 bits.
73     }
74     return result;
75   }
76 };
77 
78 template <class Generator>
79 class UniformDistribution<Generator, float> {
80  public:
81   // The number of elements that will be returned.
82   static const int kResultElementCount = Generator::kResultElementCount;
83   // Cost of generation of a single element (in cycles).
84   static const int kElementCost = 3;
85   // Indicate that this distribution may take variable number of samples
86   // during the runtime.
87   static const bool kVariableSamplesPerOutput = false;
88   typedef Array<float, kResultElementCount> ResultType;
89   typedef float ResultElementType;
90 
91   PHILOX_DEVICE_INLINE
operator()92   ResultType operator()(Generator* gen) {
93     typename Generator::ResultType sample = (*gen)();
94     ResultType result;
95     for (int i = 0; i < kResultElementCount; ++i) {
96       result[i] = Uint32ToFloat(sample[i]);
97     }
98     return result;
99   }
100 };
101 
102 template <class Generator>
103 class UniformDistribution<Generator, double> {
104  public:
105   // The number of elements that will be returned.
106   static const int kResultElementCount = Generator::kResultElementCount / 2;
107   // Cost of generation of a single element (in cycles).
108   static const int kElementCost = 3;
109   // Indicate that this distribution may take variable number of samples
110   // during the runtime.
111   static const bool kVariableSamplesPerOutput = false;
112   typedef Array<double, kResultElementCount> ResultType;
113   typedef double ResultElementType;
114 
115   PHILOX_DEVICE_INLINE
operator()116   ResultType operator()(Generator* gen) {
117     typename Generator::ResultType sample = (*gen)();
118     ResultType result;
119     for (int i = 0; i < kResultElementCount; ++i) {
120       result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
121     }
122     return result;
123   }
124 };
125 
126 template <class Generator>
127 class UniformDistribution<Generator, int32> {
128  public:
129   // The number of elements that will be returned.
130   static const int kResultElementCount = Generator::kResultElementCount;
131   // Cost of generation of a single element (in cycles).
132   static const int kElementCost = 3;
133   // Indicate that this distribution may take variable number of samples
134   // during the runtime.
135   static const bool kVariableSamplesPerOutput = false;
136   typedef Array<int32, kResultElementCount> ResultType;
137   typedef int32 ResultElementType;
138 
139   // Must have lo < hi
UniformDistribution(int32 lo,int32 hi)140   UniformDistribution(int32 lo, int32 hi) : lo_(lo), range_(hi - lo) {}
141 
142   PHILOX_DEVICE_INLINE
operator()143   ResultType operator()(Generator* gen) {
144     typename Generator::ResultType sample = (*gen)();
145     ResultType result;
146     for (int i = 0; i < kResultElementCount; ++i) {
147       result[i] = lo_ + static_cast<int32>(sample[i] % range_);
148     }
149     return result;
150   }
151 
152  private:
153   // Note that lo_ is intentionally signed while range_ is intentionally
154   // unsigned.  This is because hi - lo can overflow signed integers if
155   // lo < 0 < hi, but always fits in unsigned.
156   int32 lo_;
157   uint32 range_;
158 };
159 
160 template <class Generator>
161 class UniformDistribution<Generator, int64> {
162  public:
163   // The number of elements that will be returned.
164   static const int kResultElementCount = Generator::kResultElementCount / 2;
165   // Cost of generation of a single element (in cycles).
166   static const int kElementCost = 3;
167   // Indicate that this distribution may take variable number of samples
168   // during the runtime.
169   static const bool kVariableSamplesPerOutput = false;
170   typedef Array<int64, kResultElementCount> ResultType;
171   typedef int64 ResultElementType;
172 
173   // Must have lo < hi
UniformDistribution(int64 lo,int64 hi)174   UniformDistribution(int64 lo, int64 hi) : lo_(lo), range_(hi - lo) {}
175 
176   PHILOX_DEVICE_INLINE
operator()177   ResultType operator()(Generator* gen) {
178     typename Generator::ResultType sample = (*gen)();
179     ResultType result;
180     for (int i = 0; i < kResultElementCount; ++i) {
181       auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
182       result[i] = lo_ + static_cast<int64>(bits % range_);
183     }
184     return result;
185   }
186 
187  private:
188   // Note that lo_ is intentionally signed while range_ is intentionally
189   // unsigned.  This is because hi - lo can overflow signed integers if
190   // lo < 0 < hi, but always fits in unsigned.
191   int64 lo_;
192   uint64 range_;
193 };
194 
195 // A class that adapts the underlying native multiple samples to return a single
196 // sample at a time.
197 template <class Generator>
198 class SingleSampleAdapter {
199  public:
200   // The number of elements that will be returned.
201   static const int kResultElementCount = 1;
202   // The number of elements that will be returned by the underlying generator.
203   static const int kNativeElementCount = Generator::kResultElementCount;
204   typedef typename Generator::ResultElementType ResultType;
205   typedef typename Generator::ResultElementType ResultElementType;
206 
207   PHILOX_DEVICE_INLINE
SingleSampleAdapter(Generator * gen)208   explicit SingleSampleAdapter(Generator* gen)
209       : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
210 
211   PHILOX_DEVICE_INLINE
operator()212   ResultType operator()() {
213     if (used_result_index_ == Generator::kResultElementCount) {
214       unused_results_ = (*generator_)();
215       used_result_index_ = 0;
216     }
217 
218     return unused_results_[used_result_index_++];
219   }
220 
221   PHILOX_DEVICE_INLINE
Skip(uint64 num_skips)222   void Skip(uint64 num_skips) {
223     if (!num_skips) {
224       return;
225     }
226     int num_unused_results = kNativeElementCount - used_result_index_;
227     if (num_skips <= num_unused_results) {
228       used_result_index_ += num_skips;
229       return;
230     }
231     num_skips -= num_unused_results;
232     used_result_index_ = kNativeElementCount;
233     SkipFromGenerator(num_skips / kNativeElementCount);
234     num_skips = num_skips % kNativeElementCount;
235     if (num_skips) {
236       unused_results_ = (*generator_)();
237       used_result_index_ = num_skips;
238     }
239   }
240 
241  private:
242   // This implementation iteratively skips over `num_skips` samples
243   // from `generator_`. There is an O(1) implementation for PhiloxRandom
244   // in random_distributions.cc.
245   PHILOX_DEVICE_INLINE
SkipFromGenerator(uint64 num_skips)246   void SkipFromGenerator(uint64 num_skips) {
247     while (num_skips--) {
248       (*generator_)();
249     }
250   }
251 
252   Generator* generator_;
253   typename Generator::ResultType unused_results_;
254   int used_result_index_;
255 };
256 
257 // A class that generates unit normal distribution random numbers from the
258 // underlying random integer generator.
259 // Arguments:
260 //   Generator: a generator type that returns a number of uint32 upon each
261 //              each invocation. It needs to define kResultElementCount for the
262 //              sample count for each invocation, and ResultType for actual
263 //              returned sample type.
264 //   RealType: the data type of the real numbers that will be returned by the
265 //             distribution. This could be either float or double for now.
266 // This class is meant to be implemented through specialization. The default
267 // is not defined by design.
268 template <class Generator, typename RealType>
269 class NormalDistribution;
270 
271 PHILOX_DEVICE_INLINE
272 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
273 
274 PHILOX_DEVICE_INLINE
275 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0,
276                      double* d1);
277 
278 // Exactly like the float version, except that we convert to half afterwards;
279 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
280 // gain from working in half internally.
281 template <class Generator>
282 class NormalDistribution<Generator, Eigen::half> {
283  public:
284   // The number of elements that will be returned.
285   static const int kResultElementCount = Generator::kResultElementCount;
286   // Cost of generation of a single element (in cycles).
287   static const int kElementCost = 70;
288   // Indicate that this distribution may take variable number of samples
289   // during the runtime.
290   static const bool kVariableSamplesPerOutput = false;
291   typedef Array<Eigen::half, kResultElementCount> ResultType;
292   typedef Eigen::half ResultElementType;
293 
294   PHILOX_DEVICE_INLINE
operator()295   ResultType operator()(Generator* gen) {
296     typename Generator::ResultType sample = (*gen)();
297     ResultType result;
298     for (int i = 0; i < kResultElementCount; i += 2) {
299       float f[2];
300       BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
301       result[i] = Eigen::half(f[0]);
302       result[i + 1] = Eigen::half(f[1]);
303     }
304     return result;
305   }
306 };
307 
308 template <class Generator>
309 class NormalDistribution<Generator, float> {
310  public:
311   // The number of elements that will be returned.
312   static const int kResultElementCount = Generator::kResultElementCount;
313   // Cost of generation of a single element (in cycles).
314   static const int kElementCost = 70;
315   // Indicate that this distribution may take variable number of samples
316   // during the runtime.
317   static const bool kVariableSamplesPerOutput = false;
318   typedef Array<float, kResultElementCount> ResultType;
319   typedef float ResultElementType;
320 
321   PHILOX_DEVICE_INLINE
operator()322   ResultType operator()(Generator* gen) {
323     typename Generator::ResultType sample = (*gen)();
324     ResultType result;
325     for (int i = 0; i < kResultElementCount; i += 2) {
326       BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
327     }
328     return result;
329   }
330 };
331 
332 template <class Generator>
333 class NormalDistribution<Generator, double> {
334  public:
335   // The number of elements that will be returned.
336   static const int kResultElementCount = Generator::kResultElementCount / 2;
337   // Cost of generation of a single element (in cycles).
338   static const int kElementCost = 70;
339   // Indicate that this distribution may take variable number of samples
340   // during the runtime.
341   static const bool kVariableSamplesPerOutput = false;
342   typedef Array<double, kResultElementCount> ResultType;
343   typedef double ResultElementType;
344 
345   PHILOX_DEVICE_INLINE
operator()346   ResultType operator()(Generator* gen) {
347     typename Generator::ResultType sample = (*gen)();
348     ResultType result;
349     for (int i = 0; i < kResultElementCount; i += 2) {
350       const int i2 = 2 * i;
351       BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2],
352                       sample[i2 + 3], &result[i], &result[i + 1]);
353     }
354     return result;
355   }
356 };
357 
358 // A class that returns standard normal distribution between
359 // [-kTruncateValue, kTruncateValue].
360 // Arguments:
361 //   Generator: a generator type that returns a number of uint32 upon each
362 //              each invocation. It needs to define kResultElementCount for the
363 //              sample count for each invocation, and ResultType for actual
364 //              returned sample type.
365 //   RealType: the data type of the real numbers that will be returned by the
366 //             distribution. This could be either float or double for now.
367 // This class is meant to be implemented through specialization. The default
368 // is not defined by design.
369 template <class SingleSampleGenerator, typename RealType>
370 class TruncatedNormalDistribution;
371 
372 // Exactly like the float version, except that we convert to half afterwards;
373 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
374 // gain from working in half internally.
375 template <class SingleSampleGenerator>
376 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
377  public:
378   // The number of elements that will be returned.
379   static const int kResultElementCount =
380       SingleSampleGenerator::kNativeElementCount;
381   // Cost of generation of a single element (in cycles).
382   static const int kElementCost = 90;
383   // Indicate that this distribution may take variable number of samples
384   // during the runtime.
385   static const bool kVariableSamplesPerOutput = true;
386   // The threshold where the normal distribution is truncated.
387   const float kTruncateValue = 2.0f;
388 
389   typedef Array<Eigen::half, kResultElementCount> ResultType;
390   typedef Eigen::half ResultElementType;
391 
392   PHILOX_DEVICE_INLINE
operator()393   ResultType operator()(SingleSampleGenerator* gen) {
394     ResultType results;
395     int index = 0;
396     while (true) {
397       // Repeatedly take samples from the normal distribution, until we have
398       // the desired number of elements that fall within the pre-defined cutoff
399       // threshold.
400       const uint32 x0 = (*gen)();
401       const uint32 x1 = (*gen)();
402       float f[2];
403       BoxMullerFloat(x0, x1, &f[0], &f[1]);
404 
405       for (int i = 0; i < 2; ++i) {
406         if (Eigen::numext::abs(f[i]) < kTruncateValue) {
407           results[index++] = Eigen::half(f[i]);
408           if (index >= kResultElementCount) {
409             return results;
410           }
411         }
412       }
413     }
414   }
415 };
416 
417 // Partial specialization for float.
418 template <class SingleSampleGenerator>
419 class TruncatedNormalDistribution<SingleSampleGenerator, float> {
420  public:
421   // The number of elements that will be returned.
422   static const int kResultElementCount =
423       SingleSampleGenerator::kNativeElementCount;
424   // Cost of generation of a single element (in cycles).
425   static const int kElementCost = 90;
426   // Indicate that this distribution may take variable number of samples
427   // during the runtime.
428   static const bool kVariableSamplesPerOutput = true;
429   // The threshold where the normal distribution is truncated.
430   const float kTruncateValue = 2.0f;
431 
432   typedef Array<float, kResultElementCount> ResultType;
433   typedef float ResultElementType;
434 
435   PHILOX_DEVICE_INLINE
operator()436   ResultType operator()(SingleSampleGenerator* gen) {
437     ResultType results;
438     int index = 0;
439     while (true) {
440       // Repeatedly take samples from the normal distribution, until we have
441       // the desired number of elements that fall within the pre-defined cutoff
442       // threshold.
443       const uint32 x0 = (*gen)();
444       const uint32 x1 = (*gen)();
445       float f[2];
446       BoxMullerFloat(x0, x1, &f[0], &f[1]);
447 
448       for (int i = 0; i < 2; ++i) {
449         if (Eigen::numext::abs(f[i]) < kTruncateValue) {
450           results[index++] = f[i];
451           if (index >= kResultElementCount) {
452             return results;
453           }
454         }
455       }
456     }
457   }
458 };
459 
460 // Partial specialization for double.
461 template <class SingleSampleGenerator>
462 class TruncatedNormalDistribution<SingleSampleGenerator, double> {
463  public:
464   // The number of elements that will be returned.
465   static const int kResultElementCount =
466       (SingleSampleGenerator::kNativeElementCount > 1)
467           ? SingleSampleGenerator::kNativeElementCount / 2
468           : 1;
469   // Cost of generation of a single element (in cycles).
470   static const int kElementCost = 90;
471   // Indicate that this distribution may take variable number of samples
472   // during the runtime.
473   static const bool kVariableSamplesPerOutput = true;
474   typedef Array<double, kResultElementCount> ResultType;
475   typedef double ResultElementType;
476   const double kTruncateValue = 2.0;
477 
478   PHILOX_DEVICE_INLINE
operator()479   ResultType operator()(SingleSampleGenerator* gen) {
480     ResultType results;
481     int index = 0;
482     while (1) {
483       const uint32 x0 = (*gen)();
484       const uint32 x1 = (*gen)();
485       const uint32 x2 = (*gen)();
486       const uint32 x3 = (*gen)();
487       double d[2];
488       BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
489 
490       for (int i = 0; i < 2; ++i) {
491         if (Eigen::numext::abs(d[i]) < kTruncateValue) {
492           results[index++] = d[i];
493           if (index >= kResultElementCount) {
494             return results;
495           }
496         }
497       }
498     }
499   }
500 };
501 
502 // Helper function to convert two 32-bit uniform integers to two floats
503 // under the unit normal distribution.
504 PHILOX_DEVICE_INLINE
BoxMullerFloat(uint32 x0,uint32 x1,float * f0,float * f1)505 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
506   // This function implements the Box-Muller transform:
507   // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
508   // Do not send a really small number to log().
509   // We cannot mark "epsilon" as "static const" because NVCC would complain
510   const float epsilon = 1.0e-7f;
511   float u1 = Uint32ToFloat(x0);
512   if (u1 < epsilon) {
513     u1 = epsilon;
514   }
515   const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
516   const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
517 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
518   *f0 = Eigen::numext::sin(v1);
519   *f1 = Eigen::numext::cos(v1);
520 #else
521   sincosf(v1, f0, f1);
522 #endif
523   *f0 *= u2;
524   *f1 *= u2;
525 }
526 
527 // Helper function to convert four 32-bit uniform integers to two doubles
528 // under the unit normal distribution.
529 PHILOX_DEVICE_INLINE
BoxMullerDouble(uint32 x0,uint32 x1,uint32 x2,uint32 x3,double * d0,double * d1)530 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0,
531                      double* d1) {
532   // This function implements the Box-Muller transform:
533   // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
534   // Do not send a really small number to log().
535   // We cannot mark "epsilon" as "static const" because NVCC would complain
536   const double epsilon = 1.0e-7;
537   double u1 = Uint64ToDouble(x0, x1);
538   if (u1 < epsilon) {
539     u1 = epsilon;
540   }
541   const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
542   const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
543 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
544   *d0 = Eigen::numext::sin(v1);
545   *d1 = Eigen::numext::cos(v1);
546 #else
547   sincos(v1, d0, d1);
548 #endif
549   *d0 *= u2;
550   *d1 *= u2;
551 }
552 
553 // Helper function to convert an 16-bit integer to a half between [0..1).
Uint16ToHalf(uint16 x)554 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
555   // IEEE754 halfs are formatted as follows (MSB first):
556   //    sign(1) exponent(5) mantissa(10)
557   // Conceptually construct the following:
558   //    sign == 0
559   //    exponent == 15  -- an excess 15 representation of a zero exponent
560   //    mantissa == 10 random bits
561   const uint16 man = x & 0x3ffu;  // 10 bit mantissa
562   const uint16 exp = static_cast<uint16>(15);
563   const uint16 val = (exp << 10) | man;
564 
565   Eigen::half result;
566   result.x = val;
567   return result - Eigen::half(1.0);
568 }
569 
570 // Helper function to convert an 32-bit integer to a float between [0..1).
Uint32ToFloat(uint32 x)571 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
572   // IEEE754 floats are formatted as follows (MSB first):
573   //    sign(1) exponent(8) mantissa(23)
574   // Conceptually construct the following:
575   //    sign == 0
576   //    exponent == 127  -- an excess 127 representation of a zero exponent
577   //    mantissa == 23 random bits
578   const uint32 man = x & 0x7fffffu;  // 23 bit mantissa
579   const uint32 exp = static_cast<uint32>(127);
580   const uint32 val = (exp << 23) | man;
581 
582   // Assumes that endian-ness is same for float and uint32.
583   float result;
584   memcpy(&result, &val, sizeof(val));
585   return result - 1.0f;
586 }
587 
588 // Helper function to convert two 32-bit integers to a double between [0..1).
Uint64ToDouble(uint32 x0,uint32 x1)589 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
590   // IEEE754 doubles are formatted as follows (MSB first):
591   //    sign(1) exponent(11) mantissa(52)
592   // Conceptually construct the following:
593   //    sign == 0
594   //    exponent == 1023  -- an excess 1023 representation of a zero exponent
595   //    mantissa == 52 random bits
596   const uint32 mhi = x0 & 0xfffffu;  // upper 20 bits of mantissa
597   const uint32 mlo = x1;             // lower 32 bits of mantissa
598   const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo;  // mantissa
599   const uint64 exp = static_cast<uint64>(1023);
600   const uint64 val = (exp << 52) | man;
601   // Assumes that endian-ness is same for double and uint64.
602   double result;
603   memcpy(&result, &val, sizeof(val));
604   return result - 1.0;
605 }
606 
607 }  // namespace random
608 }  // namespace tensorflow
609 
610 #endif  // TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
611