• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_RANDOM_OP_CPU_H_
17 #define TENSORFLOW_CORE_KERNELS_RANDOM_OP_CPU_H_
18 
19 #define EIGEN_USE_THREADS
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <memory>
24 
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/register_types.h"
27 #include "tensorflow/core/framework/tensor.h"
28 #include "tensorflow/core/framework/tensor_shape.h"
29 #include "tensorflow/core/kernels/random_op.h"
30 #include "tensorflow/core/kernels/random_ops_util.h"
31 #include "tensorflow/core/lib/hash/crc32c.h"
32 #include "tensorflow/core/lib/random/random_distributions.h"
33 #include "tensorflow/core/lib/random/simple_philox.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/util/guarded_philox_random.h"
36 #include "tensorflow/core/util/work_sharder.h"
37 
38 #if EIGEN_COMP_GNUC && __cplusplus > 199711L
39 #define DISABLE_FLOAT_EQUALITY_WARNING \
40   _Pragma("GCC diagnostic push")       \
41       _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
42 #define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
43 #else
44 #define DISABLE_FLOAT_EQUALITY_WARNING
45 #define ENABLE_FLOAT_EQUALITY_WARNING
46 #endif
47 
48 namespace tensorflow {
49 
50 typedef Eigen::ThreadPoolDevice CPUDevice;
51 typedef Eigen::GpuDevice GPUDevice;
52 
53 namespace functor {
54 using random::PhiloxRandom;
55 using random::SingleSampleAdapter;
56 
57 // The default implementation of the functor, which should never be invoked
58 // But we still need to provide implementation for now for the linker to work,
59 // since we do not support all the distributions yet.
60 template <typename Device, class Distribution>
61 struct FillPhiloxRandom {
62   typedef typename Distribution::ResultElementType T;
operatorFillPhiloxRandom63   void operator()(OpKernelContext* ctx, const Device&, const uint64* key,
64                   const uint64* counter, random::PhiloxRandom gen, T* data,
65                   int64_t size, Distribution dist) {
66     OP_REQUIRES(
67         ctx, false,
68         errors::Internal(
69             "Default `FillPhiloxRandom` implementation should not be executed. "
70             "The cause of this error is probably that `FillPhiloxRandom` does "
71             "not support this device or random distribution yet."));
72   }
73 };
74 
75 // A class to fill a specified range of random groups
76 template <class Distribution, bool VariableSamplesPerOutput>
77 struct FillPhiloxRandomTask;
78 
79 // Specialization for distribution that takes a fixed number of samples for
80 // each output.
81 template <class Distribution>
82 struct FillPhiloxRandomTask<Distribution, false> {
83   typedef typename Distribution::ResultElementType T;
84   static void Run(random::PhiloxRandom gen, T* data, int64_t size,
85                   int64_t start_group, int64_t limit_group, Distribution dist) {
86     const int kGroupSize = Distribution::kResultElementCount;
87 
88     gen.Skip(start_group);
89     int64_t offset = start_group * kGroupSize;
90 
91     // First fill all the full-size groups
92     int64_t limit_group_full = std::min(limit_group, size / kGroupSize);
93     for (int64_t index = start_group; index < limit_group_full; ++index) {
94       auto samples = dist(&gen);
95       std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
96       offset += kGroupSize;
97     }
98 
99     // If there are any remaining elements that need to be filled, process them
100     if (limit_group_full < limit_group) {
101       int64_t remaining_size = size - limit_group_full * kGroupSize;
102       auto samples = dist(&gen);
103       std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
104     }
105   }
106 };
107 
108 // Specialization for distribution that takes a variable number of samples for
109 // each output. This will be slower due to the generality.
110 template <class Distribution>
111 struct FillPhiloxRandomTask<Distribution, true> {
112   typedef typename Distribution::ResultElementType T;
113   static constexpr int64_t kReservedSamplesPerOutput = 256;
114 
115   static void Run(random::PhiloxRandom base_gen, T* data, int64_t size,
116                   int64_t start_group, int64_t limit_group, Distribution dist) {
117     const int kGroupSize = Distribution::kResultElementCount;
118 
119     static const int kGeneratorSkipPerOutputGroup =
120         kGroupSize * kReservedSamplesPerOutput /
121         PhiloxRandom::kResultElementCount;
122 
123     int64_t offset = start_group * kGroupSize;
124 
125     // First fill all the full-size groups
126     int64_t limit_group_full = std::min(limit_group, size / kGroupSize);
127     int64_t group_index;
128     for (group_index = start_group; group_index < limit_group_full;
129          ++group_index) {
130       // Reset the generator to the beginning of the output group region
131       // This is necessary if we want the results to be independent of order
132       // of work
133       PhiloxRandom gen = base_gen;
134       gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
135       SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
136 
137       auto samples = dist(&single_samples);
138       std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
139       offset += kGroupSize;
140     }
141 
142     // If there are any remaining elements that need to be filled, process them
143     if (limit_group_full < limit_group) {
144       PhiloxRandom gen = base_gen;
145       gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
146       SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
147 
148       int64_t remaining_size = size - limit_group_full * kGroupSize;
149       auto samples = dist(&single_samples);
150       std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
151     }
152   }
153 };
154 
155 // Partial specialization for CPU to fill the entire region with randoms
156 // It splits the work into several tasks and run them in parallel
157 template <class Distribution>
158 void FillPhiloxRandom<CPUDevice, Distribution>::operator()(
159     OpKernelContext* ctx, const CPUDevice&, const uint64* key,
160     const uint64* counter, random::PhiloxRandom gen,
161     typename Distribution::ResultElementType* data, int64_t size,
162     Distribution dist) {
163   const int kGroupSize = Distribution::kResultElementCount;
164 
165   auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
166 
167   int64_t total_group_count = (size + kGroupSize - 1) / kGroupSize;
168 
169   const int kGroupCost =
170       random::PhiloxRandom::kResultElementCount *
171       (random::PhiloxRandom::kElementCost + Distribution::kElementCost);
172 
173   if (key != nullptr && counter != nullptr) {
174     gen = GetPhiloxRandomFromCounterKeyMem(counter, key);
175   }
176 
177   Shard(worker_threads.num_threads, worker_threads.workers, total_group_count,
178         kGroupCost,
179         [&gen, data, size, dist](int64_t start_group, int64_t limit_group) {
180           FillPhiloxRandomTask<
181               Distribution,
182               Distribution::kVariableSamplesPerOutput>::Run(gen, data, size,
183                                                             start_group,
184                                                             limit_group, dist);
185         });
186 }
187 
188 }  // namespace functor
189 
190 
191 }  // end namespace tensorflow
192 
193 #endif  // TENSORFLOW_CORE_KERNELS_RANDOM_OP_CPU_H_
194