1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_JIT_CLONE_CONSTANTS_FOR_BETTER_CLUSTERING_H_ 17 #define TENSORFLOW_COMPILER_JIT_CLONE_CONSTANTS_FOR_BETTER_CLUSTERING_H_ 18 19 #include "tensorflow/core/common_runtime/optimization_registry.h" 20 21 #include "absl/container/flat_hash_set.h" 22 #include "tensorflow/stream_executor/lib/statusor.h" 23 24 namespace tensorflow { 25 // Clones small host constants in the graph to make it easier to form larger 26 // clusters. 27 // 28 // This helps us in two ways: 29 // 30 // - It reduces dependencies between clusters. Let's say a constant C is used 31 // by nodes X and Y. If X and Y are put in different clusters (for whatever 32 // reason) Y's cluster now has to wait for all the operations in X's cluster 33 // to finish before it starts running. 34 // 35 // - It lets us create bigger clusters in multi-GPU benchmarks. Consider the 36 // following graph: 37 // 38 // digraph { 39 // Const -> GPU_1 40 // Const -> GPU_0_Y 41 // GPU_0_X -> GPU_0_Y 42 // } 43 // 44 // We'd cluster Const and GPU_1 together (and place it on GPU_1), and this 45 // will block us from clustering GPU_0_X and GPU_0_Y together since that 46 // would increase the amount of work on GPU 0 waiting on work on GPU 1. 47 // However, cloning Const into two copies, one for GPU_0_Y and one for GPU_1 48 // will let us create one cluster containing {Const/copy_0, GPU_1} and 49 // another containing {Const/copy_1, GPU_0_X, GPU_0_Y}. 50 // 51 // We only clone small host constants now to avoid increasing memory consumption 52 // too much. Moreover, in practice the constants we have to duplicate are 53 // things like the `perm` input to `Transpose` and the `size` input to `Slice` 54 // which tend to be small anyway. 55 56 class CloneConstantsForBetterClusteringPass : public GraphOptimizationPass { 57 public: 58 CloneConstantsForBetterClusteringPass() = default; 59 60 Status Run(const GraphOptimizationPassOptions& options) override; 61 }; 62 } // namespace tensorflow 63 64 #endif // TENSORFLOW_COMPILER_JIT_CLONE_CONSTANTS_FOR_BETTER_CLUSTERING_H_ 65