1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Types to express dimensionality of a kernel launch. Blocks and threads 17 // are (up to) 3-dimensional. 18 // 19 // A thread is conceptually like a SIMD lane. Some number, typically 32 20 // (though that fact should not be relied on) SIMD lanes are tied together with 21 // a single PC in a unit called a warp. There is a maximum number of threads 22 // that can execute in a shared-context entity called a block. Presently, that 23 // number is 1024 -- again, something that should not be relied on from this 24 // comment, but checked via stream_executor::DeviceDescription. 25 // 26 // For additional information, see 27 // http://docs.nvidia.com/cuda/kepler-tuning-guide/#device-utilization-and-occupancy 28 // 29 // Because of that modest thread-per-block limit, a kernel can be launched with 30 // multiple blocks. Each block is indivisibly scheduled onto a single core. 31 // Blocks can also be used in a multi-dimensional configuration, and the block 32 // count has much less modest limits -- typically they're similar to the maximum 33 // amount of addressable memory. 34 35 #ifndef TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_LAUNCH_DIM_H_ 36 #define TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_LAUNCH_DIM_H_ 37 38 #include <string> 39 40 #include "absl/strings/str_cat.h" 41 #include "tensorflow/compiler/xla/stream_executor/platform/port.h" 42 43 namespace stream_executor { 44 45 // Basic type that represents a 3-dimensional index space. 46 struct Dim3D { 47 uint64_t x, y, z; 48 Dim3DDim3D49 Dim3D(uint64_t x, uint64 y, uint64 z) : x(x), y(y), z(z) {} 50 }; 51 52 // Thread dimensionality for use in a kernel launch. See file comment for 53 // details. 54 struct ThreadDim : public Dim3D { 55 explicit ThreadDim(uint64_t x = 1, uint64 y = 1, uint64 z = 1) Dim3DThreadDim56 : Dim3D(x, y, z) {} 57 58 // Returns a string representation of the thread dimensionality. ToStringThreadDim59 std::string ToString() const { 60 return absl::StrCat("ThreadDim{", x, ", ", y, ", ", z, "}"); 61 } 62 }; 63 64 // Block dimensionality for use in a kernel launch. See file comment for 65 // details. 66 struct BlockDim : public Dim3D { 67 explicit BlockDim(uint64_t x = 1, uint64 y = 1, uint64 z = 1) Dim3DBlockDim68 : Dim3D(x, y, z) {} 69 70 // Returns a string representation of the block dimensionality. ToStringBlockDim71 std::string ToString() const { 72 return absl::StrCat("BlockDim{", x, ", ", y, ", ", z, "}"); 73 } 74 }; 75 76 } // namespace stream_executor 77 78 #endif // TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_LAUNCH_DIM_H_ 79