• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Types to express dimensionality of a kernel launch. Blocks and threads
17 // are (up to) 3-dimensional.
18 //
19 // A thread is conceptually like a SIMD lane. Some number, typically 32
20 // (though that fact should not be relied on) SIMD lanes are tied together with
21 // a single PC in a unit called a warp. There is a maximum number of threads
22 // that can execute in a shared-context entity called a block. Presently, that
23 // number is 1024 -- again, something that should not be relied on from this
24 // comment, but checked via stream_executor::DeviceDescription.
25 //
26 // For additional information, see
27 // http://docs.nvidia.com/cuda/kepler-tuning-guide/#device-utilization-and-occupancy
28 //
29 // Because of that modest thread-per-block limit, a kernel can be launched with
30 // multiple blocks. Each block is indivisibly scheduled onto a single core.
31 // Blocks can also be used in a multi-dimensional configuration, and the block
32 // count has much less modest limits -- typically they're similar to the maximum
33 // amount of addressable memory.
34 
35 #ifndef TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_LAUNCH_DIM_H_
36 #define TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_LAUNCH_DIM_H_
37 
38 #include <string>
39 
40 #include "absl/strings/str_cat.h"
41 #include "tensorflow/compiler/xla/stream_executor/platform/port.h"
42 
43 namespace stream_executor {
44 
45 // Basic type that represents a 3-dimensional index space.
46 struct Dim3D {
47   uint64_t x, y, z;
48 
Dim3DDim3D49   Dim3D(uint64_t x, uint64 y, uint64 z) : x(x), y(y), z(z) {}
50 };
51 
52 // Thread dimensionality for use in a kernel launch. See file comment for
53 // details.
54 struct ThreadDim : public Dim3D {
55   explicit ThreadDim(uint64_t x = 1, uint64 y = 1, uint64 z = 1)
Dim3DThreadDim56       : Dim3D(x, y, z) {}
57 
58   // Returns a string representation of the thread dimensionality.
ToStringThreadDim59   std::string ToString() const {
60     return absl::StrCat("ThreadDim{", x, ", ", y, ", ", z, "}");
61   }
62 };
63 
64 // Block dimensionality for use in a kernel launch. See file comment for
65 // details.
66 struct BlockDim : public Dim3D {
67   explicit BlockDim(uint64_t x = 1, uint64 y = 1, uint64 z = 1)
Dim3DBlockDim68       : Dim3D(x, y, z) {}
69 
70   // Returns a string representation of the block dimensionality.
ToStringBlockDim71   std::string ToString() const {
72     return absl::StrCat("BlockDim{", x, ", ", y, ", ", z, "}");
73   }
74 };
75 
76 }  // namespace stream_executor
77 
78 #endif  // TENSORFLOW_COMPILER_XLA_STREAM_EXECUTOR_LAUNCH_DIM_H_
79