1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAUNCH_DIMENSIONS_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAUNCH_DIMENSIONS_H_ 18 19 #include <map> 20 #include <memory> 21 22 #include "tensorflow/compiler/xla/service/gpu/gpu_device_info.h" 23 #include "tensorflow/compiler/xla/shape.h" 24 25 namespace xla { 26 namespace gpu { 27 28 // Encapsulates the launch dimensions of a kernel, e.g., the block count and the 29 // number of threads per block. 30 class LaunchDimensions { 31 public: 32 struct Dim3D { 33 int64 x, y, z; 34 }; 35 36 // The default constructor creates a launch dimension that indicate 37 // single-threaded execution. LaunchDimensions()38 LaunchDimensions() 39 : block_counts_({1, 1, 1}), thread_counts_per_block_({1, 1, 1}) {} 40 LaunchDimensions(int64 block_x_count,int64 thread_x_count_per_block)41 LaunchDimensions(int64 block_x_count, int64 thread_x_count_per_block) 42 : block_counts_({block_x_count, 1, 1}), 43 thread_counts_per_block_({thread_x_count_per_block, 1, 1}) {} 44 LaunchDimensions(const Dim3D & block_counts,const Dim3D & thread_counts_per_block)45 LaunchDimensions(const Dim3D& block_counts, 46 const Dim3D& thread_counts_per_block) 47 : block_counts_(block_counts), 48 thread_counts_per_block_(thread_counts_per_block) {} 49 block_counts()50 Dim3D block_counts() const { return block_counts_; } 51 thread_counts_per_block()52 Dim3D thread_counts_per_block() const { return thread_counts_per_block_; } 53 launch_bound()54 int64 launch_bound() const { 55 return block_counts_.x * thread_counts_per_block_.x * block_counts_.y * 56 thread_counts_per_block_.y * block_counts_.z * 57 thread_counts_per_block_.z; 58 } 59 60 private: 61 Dim3D block_counts_; 62 Dim3D thread_counts_per_block_; 63 }; 64 65 std::ostream& operator<<(std::ostream& out, 66 const LaunchDimensions& launch_dims); 67 68 LaunchDimensions CalculateLaunchDimensions(const Shape& shape, 69 GpuDeviceInfo gpu_device_info, 70 int unroll_factor = 1, 71 bool few_waves = false); 72 73 } // namespace gpu 74 } // namespace xla 75 76 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_LAUNCH_DIMENSIONS_H_ 77