• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_
17 #define TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include "tensorflow/c/tf_attrtype.h"
23 #include "tensorflow/core/tpu/libtftpu.h"
24 
25 extern "C" {
26 
27 struct TF_Status;
28 typedef struct TF_Status TF_Status;
29 
30 // Maximum number of array elements to inline into structs for performance.
31 #define TPU_C_API_MAX_INLINED 6
32 
33 enum TpuCoreTypeEnum {
34   kTensorCore,
35   kEmbeddingV1,
36   kEmbeddingV2,
37 };
38 
39 enum TpuVersionEnum {
40   kUnknownTpuVersion,
41   kTpuV2,
42   kTpuV3,
43   kTpuV4,
44 };
45 
46 typedef struct TpuRuntimeVersion {
47   // The three version numbers are: major, minor, patch
48   int version[3];
49   const char* metadata;
50   size_t metadata_size;
51 } TpuRuntimeVersion;
52 
53 typedef struct SE_Platform SE_Platform;
54 typedef struct SE_StreamExecutor SE_StreamExecutor;
55 typedef struct SE_Stream SE_Stream;
56 typedef struct SE_Event SE_Event;
57 typedef struct SE_Timer SE_Timer;
58 
59 typedef struct TpuSerializedProto {
60   const char* bytes;
61   size_t size;
62 } TpuSerializedProto;
63 
64 typedef struct SE_PlatformId {
65   void* id;  // aka stream_executor::Platform::Id
66 } SE_PlatformId;
67 typedef struct SE_StreamExecutorConfig SE_StreamExecutorConfig;
68 typedef struct SE_DeviceOptions SE_DeviceOptions;
69 typedef TF_Status* (*SE_StatusCallbackFn)(void*);
70 
71 typedef struct SE_DeviceMemoryBase {
72   void* opaque;
73   uint64_t size;
74   uint64_t payload;
75 } SE_DeviceMemoryBase;
76 
77 typedef struct SE_ScopedDeviceMemory {
78   SE_DeviceMemoryBase wrapped;
79   int device_ordinal;
80 } SE_ScopedDeviceMemory;
81 
82 typedef struct SE_AllocatorStats {
83   int64_t num_allocs;
84   int64_t bytes_in_use;
85   int64_t peak_bytes_in_use;
86   int64_t largest_alloc_size;
87 
88   bool has_bytes_limit;
89   int64_t bytes_limit;
90 
91   int64_t bytes_reserved;
92   int64_t peak_bytes_reserved;
93 
94   bool has_bytes_reservable_limit;
95   int64_t bytes_reservable_limit;
96 
97   int64_t largest_free_block_bytes;
98 } SE_AllocatorStats;
99 
100 // Note, due to the... odd way in which DeviceMemoryAllocator is used in TF, we
101 // cannot simply wrap an underlying pointer. Instead, we reverse the call
102 // direction and request memory via a callback.
103 typedef void (*SE_AllocateFn)(void* ctx, int device_ordinal, uint64_t size,
104                               bool retry_on_failure, int64_t memory_space,
105                               SE_ScopedDeviceMemory* result, TF_Status* status);
106 
107 typedef void (*SE_DeallocateFn)(void* ctx, SE_DeviceMemoryBase* base,
108                                 int device_ordinal, TF_Status* status);
109 
110 typedef struct SE_DeviceMemoryAllocator {
111   SE_Platform* platform;
112   void* ctx;
113   SE_AllocateFn allocate;
114   SE_DeallocateFn deallocate;
115 } SE_DeviceMemoryAllocator;
116 
117 typedef struct SE_DeviceDescription {
118   char* device_vendor;
119   char* platform_version;
120   char* driver_version;
121   char* runtime_version;
122   char* pci_bus_id;
123   char* name;
124 
125   int64_t thread_dim_limit_x;
126   int64_t thread_dim_limit_y;
127   int64_t thread_dim_limit_z;
128   int64_t block_dim_limit_x;
129   int64_t block_dim_limit_y;
130   int64_t block_dim_limit_z;
131 
132   int64_t threads_per_core_limit;
133   int64_t threads_per_block_limit;
134   int64_t threads_per_warp;
135 
136   int64_t registers_per_core_limit;
137   int64_t registers_per_block_limit;
138 
139   int64_t device_address_bits;
140   int64_t device_memory_size;
141   int64_t memory_bandwidth;
142 
143   int64_t shared_memory_per_core;
144   int64_t shared_memory_per_block;
145 
146   float clock_rate_ghz;
147 
148   int cuda_compute_capability_major;
149   int cuda_compute_capability_minor;
150 
151   int rocm_amdgpu_isa_version;
152   char* rocm_amdgpu_gcn_arch_name;
153 
154   int numa_node;
155   int core_count;
156   bool ecc_enabled;
157 } SE_DeviceDescription;
158 
159 typedef struct Tpu_Compiler Tpu_Compiler;
160 typedef struct SE_Executable SE_Executable;
161 
162 typedef struct SE_ExecutableRunOptions {
163   SE_DeviceMemoryAllocator allocator;
164   int device_ordinal;
165   SE_Stream* stream;
166   SE_Stream* host_to_device_stream;
167   TpuSerializedProto device_assignment;
168   int rng_seed;
169   int64_t run_id;
170   int launch_id;
171 } SE_ExecutableRunOptions;
172 
173 typedef struct SE_ExecutableSerializationHandle
174     SE_ExecutableSerializationHandle;
175 
176 typedef struct SE_MaybeOwningDeviceMemory {
177   SE_DeviceMemoryBase memory;
178   bool owned;
179 
180   // Set if owned
181   int device_ordinal;
182   SE_DeviceMemoryAllocator allocator;
183 } SE_MaybeOwningDeviceMemory;
184 
185 struct Int64List {
186   union {
187     int64_t* heap;  // owned
188     int64_t inlined[TPU_C_API_MAX_INLINED];
189   };
190   int64_t size;
191 };
192 
193 struct BoolList {
194   union {
195     bool* heap;  // owned
196     bool inlined[TPU_C_API_MAX_INLINED];
197   };
198   int64_t size;
199 };
200 
201 typedef struct XLA_Tile {
202   Int64List dimensions;
203 } XLA_Tile;
204 
205 struct TileList {
206   union {
207     XLA_Tile* heap;  // owned
208     XLA_Tile inlined[TPU_C_API_MAX_INLINED];
209   };
210   int64_t size;
211 };
212 
213 typedef struct XLA_Layout {
214   int format;
215   Int64List minor_to_major;
216   TileList tiles;
217   int64_t element_size_in_bits;
218   int64_t memory_space;
219 } XLA_Layout;
220 
221 // Represents an XLA shape tree.
222 typedef struct XLA_Shape {
223   int element_type;
224   Int64List dimensions;
225   BoolList dynamic_dimensions;
226   XLA_Shape* tuple_shapes;  // owned
227   int ntuple_shapes;
228   XLA_Layout layout;
229 } XLA_Shape;
230 
231 // Represents a leaf node for a XLA shaped buffer.
232 typedef struct XLA_ShapedBuffer {
233   XLA_Shape on_device_shape;
234   int device_ordinal;
235 
236   SE_DeviceMemoryBase* bases;
237   size_t count;
238 } XLA_ShapedBuffer;
239 
240 // Represents a leaf XLA literal.
241 typedef struct XLA_Literal {
242   char** buffers;
243   size_t* sizes;
244   size_t count;
245   XLA_Shape shape;
246 } XLA_Literal;
247 
248 typedef struct XLA_MaybeOwningDeviceMemoryShapeTree {
249   XLA_Shape shape;
250   SE_MaybeOwningDeviceMemory* buffers;
251 } XLA_MaybeOwningDeviceMemoryShapeTree;
252 
253 typedef struct XLA_ShapeIndex {
254   int64_t indices[8];
255   int64_t count;
256 } XLA_ShapeIndex;
257 
258 typedef struct SE_ExecutionInput {
259   XLA_MaybeOwningDeviceMemoryShapeTree shape_tree;
260   XLA_ShapeIndex* unowned_indices;
261   int unowned_indices_size;
262   XLA_Shape dynamic_shape;
263 } SE_ExecutionInput;
264 
265 typedef struct SE_ExecutionOutput {
266   XLA_ShapedBuffer result;
267   SE_MaybeOwningDeviceMemory* to_be_released;
268   int to_be_released_size;
269   XLA_ShapeIndex* aliased_indices;
270   int aliased_indices_size;
271 } SE_ExecutionOutput;
272 
273 typedef struct XLA_ComputationLayout {
274   int parameter_count;
275   XLA_Shape* parameter_layouts;
276   XLA_Shape result_layout;
277 } XLA_ComputationLayout;
278 
279 typedef struct XLA_HloModuleConfig {
280   uint64_t seed;
281   int32_t launch_id;
282   int64_t replica_count;
283   int64_t num_partitions;
284   bool use_spmd_partitioning;
285   TpuSerializedProto debug_options;
286   bool has_static_device_assignment;
287   TpuSerializedProto static_device_assignment;
288   bool has_entry_computation_layout;
289   XLA_ComputationLayout entry_computation_layout;
290 } XLA_HloModuleConfig;
291 
292 typedef struct SE_HloExecutionProfile SE_HloExecutionProfile;
293 
294 struct SE_StreamExecutorList {
295   SE_StreamExecutor** exec;
296   int count;
297 };
298 
299 typedef struct XLA_HloModuleGroup {
300   TpuSerializedProto proto;
301   XLA_HloModuleConfig* module_config;
302 } XLA_HloModuleGroup;
303 
304 typedef struct XLA_HloModule {
305   TpuSerializedProto proto;
306   XLA_HloModuleConfig module_config;
307 } XLA_HloModule;
308 
309 typedef struct XLA_TransferManager XLA_TransferManager;
310 
311 typedef struct XLA_ComputationPlacer XLA_ComputationPlacer;
312 
313 typedef void (*XLA_CallbackFn)(void*);
314 typedef void (*XLA_StatusCallbackFn)(void*, TF_Status*);
315 
316 typedef struct SE_TpuTopology SE_TpuTopology;
317 typedef struct SE_TpuTopology_Core SE_TpuTopology_Core;
318 typedef struct SE_TpuTopology_Core SE_TpuTopology_Host;
319 }
320 
321 #endif  // TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_
322