• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_JIT_CLONE_CONSTANTS_FOR_BETTER_CLUSTERING_H_
17 #define TENSORFLOW_COMPILER_JIT_CLONE_CONSTANTS_FOR_BETTER_CLUSTERING_H_
18 
19 #include "tensorflow/core/common_runtime/optimization_registry.h"
20 
21 #include "absl/container/flat_hash_set.h"
22 #include "tensorflow/stream_executor/lib/statusor.h"
23 
24 namespace tensorflow {
25 // Clones small host constants in the graph to make it easier to form larger
26 // clusters.
27 //
28 // This helps us in two ways:
29 //
30 //  - It reduces dependencies between clusters.  Let's say a constant C is used
31 //    by nodes X and Y.  If X and Y are put in different clusters (for whatever
32 //    reason) Y's cluster now has to wait for all the operations in X's cluster
33 //    to finish before it starts running.
34 //
35 //  - It lets us create bigger clusters in multi-GPU benchmarks.  Consider the
36 //    following graph:
37 //
38 //    digraph {
39 //      Const -> GPU_1
40 //      Const -> GPU_0_Y
41 //      GPU_0_X -> GPU_0_Y
42 //    }
43 //
44 //    We'd cluster Const and GPU_1 together (and place it on GPU_1), and this
45 //    will block us from clustering GPU_0_X and GPU_0_Y together since that
46 //    would increase the amount of work on GPU 0 waiting on work on GPU 1.
47 //    However, cloning Const into two copies, one for GPU_0_Y and one for GPU_1
48 //    will let us create one cluster containing {Const/copy_0, GPU_1} and
49 //    another containing {Const/copy_1, GPU_0_X, GPU_0_Y}.
50 //
51 // We only clone small host constants now to avoid increasing memory consumption
52 // too much.  Moreover, in practice the constants we have to duplicate are
53 // things like the `perm` input to `Transpose` and the `size` input to `Slice`
54 // which tend to be small anyway.
55 
56 class CloneConstantsForBetterClusteringPass : public GraphOptimizationPass {
57  public:
58   CloneConstantsForBetterClusteringPass() = default;
59 
60   Status Run(const GraphOptimizationPassOptions& options) override;
61 };
62 }  // namespace tensorflow
63 
64 #endif  // TENSORFLOW_COMPILER_JIT_CLONE_CONSTANTS_FOR_BETTER_CLUSTERING_H_
65