• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
17 #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include "tensorflow/c/tf_attrtype.h"
23 #include "tensorflow/core/tpu/libtftpu.h"
24 #include "tensorflow/stream_executor/tpu/c_api_decl.h"
25 
26 extern "C" {
27 
28 SE_Platform* TpuPlatform_New();
29 void TpuPlatform_Free(SE_Platform* platform);
30 void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size,
31                             const char** options_key,
32                             const char** options_value, TF_Status* status);
33 bool TpuPlatform_Initialized(SE_Platform* platform);
34 SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform,
35                                            SE_StreamExecutorConfig* config,
36                                            TF_Status* status);
37 SE_PlatformId TpuPlatform_Id(SE_Platform* platform);
38 int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform);
39 int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform);
40 bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform);
41 SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform);
42 SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform);
43 TpuRuntimeVersion TpuPlatform_GetRuntimeVersion(SE_Platform* platform);
44 
45 void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal,
46                       SE_DeviceOptions* device_options, TF_Status* status);
47 void TpuExecutor_Free(SE_StreamExecutor* executor);
48 
49 int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor);
50 
51 SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor,
52                                          uint64_t size, int64_t memory_space);
53 void TpuExecutor_Deallocate(SE_StreamExecutor* executor,
54                             SE_DeviceMemoryBase* memory);
55 bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor,
56                                    SE_AllocatorStats* stats);
57 bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free,
58                                    int64_t* total);
59 
60 bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream);
61 void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor,
62                                   SE_Stream* stream);
63 bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor,
64                                         SE_Stream* dependent, SE_Stream* other);
65 void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream,
66                            TF_Status* status);
67 
68 SE_TpuTopology_Core* TpuExecutor_GetCoreLocation(SE_StreamExecutor* executor);
69 
70 void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event,
71                                TF_Status* status);
72 void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event,
73                                  TF_Status* status);
74 int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor,
75                                    SE_Event* event);
76 void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream,
77                              SE_Event* event, TF_Status* status);
78 void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream,
79                               SE_Event* event, TF_Status* status);
80 
81 bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer);
82 void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer);
83 bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream,
84                             SE_Timer* timer);
85 bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream,
86                            SE_Timer* timer);
87 
88 void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor,
89                                          void* host_dst,
90                                          const SE_DeviceMemoryBase* device_src,
91                                          uint64_t size, TF_Status* status);
92 void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor,
93                                            SE_DeviceMemoryBase* device_dst,
94                                            const void* host_src, uint64_t size,
95                                            TF_Status* status);
96 bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream,
97                               void* host_dst,
98                               const SE_DeviceMemoryBase* device_src,
99                               uint64_t size);
100 
101 bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream,
102                                 SE_DeviceMemoryBase* device_dst,
103                                 const void* host_src, uint64_t size);
104 
105 void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor,
106                                int32_t infeed_queue_index, const uint8_t* data,
107                                int64_t size, TF_Status* status);
108 void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor,
109                                 int32_t outfeed_queue_index, uint8_t* data,
110                                 int64_t size, TF_Status* status);
111 void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor,
112                                     int32_t infeed_queue_index,
113                                     TF_Status* status);
114 void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor,
115                                      int32_t outfeed_queue_index,
116                                      TF_Status* status);
117 
118 void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor,
119                                     SE_Stream* stream, TF_Status* status);
120 void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor,
121                                         TF_Status* status);
122 void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor);
123 bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor);
124 
125 void TpuExecutor_UnloadAllPrograms(SE_StreamExecutor* executor,
126                                    TF_Status* status);
127 void TpuExecutor_EnqueueCompactionOnStreamForHbm(SE_StreamExecutor* executor,
128                                                  SE_Stream* compaction_stream,
129                                                  TF_Status* status);
130 
131 SE_Stream* TpuStream_New(SE_StreamExecutor* parent);
132 void TpuStream_Free(SE_Stream*);
133 void* TpuStream_Stream(SE_Stream*);
134 bool TpuStream_Status(SE_Stream*);
135 bool TpuStream_IsSameSharedMemoryLocation(SE_Stream*, SE_Stream*);
136 void TpuStream_EnqueueTransferHostToDevice(SE_Stream* stream,
137                                            SE_DeviceMemoryBase device_dst,
138                                            void* host_src, uint64_t size,
139                                            TF_Status* status);
140 void TpuStream_EnqueueTransferDeviceToHost(SE_Stream* stream,
141                                            SE_DeviceMemoryBase device_src,
142                                            void* host_dst, uint64_t size,
143                                            TF_Status* status);
144 void TpuStream_TpuEnqueueOnDeviceSendRecvLocal(SE_Stream* stream,
145                                                SE_DeviceMemoryBase send_buffer,
146                                                SE_DeviceMemoryBase recv_buffer,
147                                                TF_Status* status);
148 
149 SE_Event* TpuEvent_New(SE_StreamExecutor* parent);
150 void TpuEvent_Free(SE_Event*);
151 
152 SE_Timer* TpuTimer_New(SE_StreamExecutor* parent);
153 void TpuTimer_Free(SE_Timer*);
154 int64_t TpuTimer_Nanoseconds(SE_Timer*);
155 int64_t TpuTimer_Microseconds(SE_Timer*);
156 
157 TF_Status* TpuStatus_New();
158 TF_Status* TpuStatus_Create(int32_t code, const char* msg);
159 void TpuStatus_Set(TF_Status* status, int32_t code, const char* msg,
160                    int32_t len);
161 void TpuStatus_Free(TF_Status* status);
162 const char* TpuStatus_Message(TF_Status* status);
163 int TpuStatus_Code(TF_Status* status);
164 bool TpuStatus_Ok(TF_Status* status);
165 
166 SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default();
167 void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal);
168 void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*);
169 
170 SE_DeviceDescription* TpuDeviceDescription_New();
171 void TpuDeviceDescription_Free(SE_DeviceDescription* description);
172 void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor,
173                                          SE_DeviceDescription* description,
174                                          TF_Status* status);
175 
176 SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags);
177 void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options);
178 
179 bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream,
180                               SE_StatusCallbackFn callback_fn, void* ctx);
181 
182 XLA_TransferManager* TpuTransferManager_New();
183 void TpuTransferManager_Free(XLA_TransferManager* manager);
184 SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager);
185 void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager,
186                                                XLA_Shape* host_shape,
187                                                XLA_Shape* device_shape);
188 void TpuTransferManager_TransferLiteralToDeviceAsync(
189     XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal,
190     XLA_ShapedBuffer* device_buffer, TF_Status* status);
191 void TpuTransferManager_TransferLiteralFromDevice(
192     XLA_TransferManager* manager, SE_Stream* stream,
193     XLA_ShapedBuffer* device_buffer, XLA_Literal* literal,
194     XLA_StatusCallbackFn callback, void* ctx);
195 int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager,
196                                                   XLA_Shape* shape);
197 void TpuTransferManager_ChooseCompactLayoutForShape(
198     XLA_TransferManager* manager, XLA_Shape* host_shape, XLA_Shape* output,
199     TF_Status* status);
200 bool TpuTransferManager_CanShapedBufferBeAccessedNow(
201     XLA_TransferManager* manager, SE_StreamExecutor* executor,
202     XLA_ShapedBuffer* device_buffer);
203 bool TpuTransferManager_CanBufferBeAccessedNow(
204     XLA_TransferManager* manager, SE_StreamExecutor* executor,
205     SE_DeviceMemoryBase* device_buffer);
206 void TpuTransferManager_WriteSingleTupleIndexTable(
207     XLA_TransferManager* manager, SE_Stream* stream,
208     SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape,
209     SE_DeviceMemoryBase* region, TF_Status* status);
210 void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape,
211                                         XLA_Shape* infeed_shape);
212 void TpuTransferManager_LinearizeToBuffers(
213     XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array,
214     int64_t** buffers_size, int64_t* buffers_array_size, TF_Status* status);
215 void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size,
216                                     int64_t buffers_array_size);
217 void TpuTransferManager_TransferLiteralToInfeed(XLA_TransferManager* manager,
218                                                 SE_StreamExecutor* executor,
219                                                 XLA_Literal* c_literal,
220                                                 TF_Status* status);
221 void TpuTransferManager_TransferBuffersToInfeed(XLA_TransferManager* manager,
222                                                 SE_StreamExecutor* executor,
223                                                 uint32_t** buffers_array,
224                                                 int64_t* buffers_size_in_uint32,
225                                                 int64_t buffers_array_size,
226                                                 TF_Status* status);
227 void TpuTransferManager_TransferLiteralFromOutfeed(
228     XLA_TransferManager* manager, SE_StreamExecutor* executor,
229     XLA_Shape* shape /*deprecated*/, XLA_Literal* c_literal, TF_Status* status);
230 void TpuTransferManager_ResetDevices(XLA_TransferManager* manager,
231                                      SE_StreamExecutor** executors,
232                                      int64_t num_executors, TF_Status* status);
233 void TpuTransferManager_ReadDynamicShapes(SE_Stream* stream,
234                                           XLA_ShapedBuffer* buffer,
235                                           const XLA_Shape& original_shape,
236                                           XLA_Shape* updated_shape,
237                                           TF_Status* status);
238 
239 XLA_ComputationPlacer* TpuComputationPlacer_New();
240 void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer);
241 // `assignment` should be a preallocated array of size `replicate_count` *
242 // `computation_count`. The assignment will be constructed as a 2D array where
243 // assignment[replica][computation] = device_id.
244 void TpuComputationPlacer_AssignDevices(XLA_ComputationPlacer* placer,
245                                         int replica_count,
246                                         int computation_count, int* assignment,
247                                         TF_Status* status);
248 void TpuComputationPlacer_AssignLocalDevices(SE_TpuTopology_Host* host,
249                                              int replica_count,
250                                              int computation_count,
251                                              int* assignment,
252                                              TF_Status* status);
253 
254 int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology,
255                                       TpuCoreTypeEnum tpu_core_type);
256 int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology,
257                                       TpuCoreTypeEnum tpu_core_type);
258 int TpuTopology_HostCount(SE_TpuTopology* tpu_topology);
259 int TpuTopology_ChipsPerHost(SE_TpuTopology* tpu_topology);
260 
261 int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology);
262 int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology);
263 int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology);
264 bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z);
265 SE_TpuTopology_Core* TpuTopology_CoreForId(SE_TpuTopology* tpu_topology,
266                                            TpuCoreTypeEnum tpu_core_type,
267                                            int id);
268 SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology,
269                                       TpuCoreTypeEnum tpu_core_type, int x,
270                                       int y, int z, int index);
271 int TpuTopology_NumCores(SE_TpuTopology* tpu_topology,
272                          TpuCoreTypeEnum tpu_core_type);
273 // 'cores' should be a preallocated array of size TpuTopology_NumCores.
274 void TpuTopology_Cores(SE_TpuTopology* tpu_topology,
275                        TpuCoreTypeEnum tpu_core_type,
276                        SE_TpuTopology_Core** cores);
277 int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z);
278 TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology);
279 void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location,
280                                      int* x, int* y, int* z);
281 void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location,
282                                      int* x, int* y, int* z);
283 int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location);
284 int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location);
285 
286 int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location);
287 int TpuHostLocation_NumCores(SE_TpuTopology_Host* tpu_host_location,
288                              TpuCoreTypeEnum tpu_core_type);
289 // 'cores' should be a preallocated array of size TpuHostLocation_NumCores.
290 void TpuHostLocation_Cores(SE_TpuTopology_Host* tpu_host_location,
291                            TpuCoreTypeEnum tpu_core_type,
292                            SE_TpuTopology_Core** cores);
293 
294 // C API for XLA::Compiler interface
295 
296 TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New();
297 TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler);
298 
299 TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses(
300     Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module,
301     SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator,
302     XLA_HloModule* result, TF_Status* status);
303 
304 TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend(
305     Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module,
306     SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator,
307     SE_Executable** result, TF_Status* status);
308 
309 TFTPU_CAPI_EXPORT void TpuCompiler_Compile(
310     Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group,
311     SE_StreamExecutorList* stream_exec_lists, int num_lists,
312     SE_DeviceMemoryAllocator* allocator, SE_Executable** executables,
313     TF_Status* status);
314 
315 TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler,
316                                                 XLA_Shape* c_shape);
317 
318 TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream(
319     SE_Executable* executable, SE_ExecutableRunOptions* se_options,
320     SE_ExecutionInput** se_arguments, int se_arguments_size,
321     SE_HloExecutionProfile* hlo_execution_profile,
322     SE_ExecutionOutput* se_output, TF_Status* status);
323 
324 // This frees the XLA_ShapeIndex* array allocated when se_output is returned by
325 // TpuExecutable_ExecuteAsyncOnStream.
326 TFTPU_CAPI_EXPORT void TpuExecutable_FreeXlaShapeIndexArray(
327     XLA_ShapeIndex* array);
328 
329 // This frees the SE_MaybeOwningDeviceMemory* array allocated when se_output is
330 // returned by TpuExecutable_ExecuteAsyncOnStream.
331 // Note that this only frees the heap-allocated array itself, and does not
332 // free any of the underlying device memory.
333 TFTPU_CAPI_EXPORT void TpuExecutable_FreeMaybeOwningDeviceMemoryArray(
334     SE_MaybeOwningDeviceMemory* array);
335 
336 TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable,
337                                                  const char** fingerprint,
338                                                  size_t* size);
339 
340 // The serialization format is not guaranteed to be stable over time and has no
341 // compatibility guarantees (i.e. this is not a suitable long-term storage
342 // format). TpuExecutableSerialize_FreeHandle should be called after 'handle' is
343 // no longer needed. 'handle' is set to nullptr on error.
344 TFTPU_CAPI_EXPORT void TpuExecutable_Serialize(
345     SE_Executable* executable, SE_ExecutableSerializationHandle** handle,
346     TF_Status* status);
347 
348 // Returns the size of the serialized executable in bytes, i.e. the size of the
349 // array that should be passed to TpuExecutableSerialize_WriteToArray. `handle`
350 // must be non-null.
351 TFTPU_CAPI_EXPORT size_t
352 TpuExecutableSerialize_GetByteSize(SE_ExecutableSerializationHandle* handle);
353 
354 // Writes the serialized executable to `serialized`, which must be of size
355 // `serialized_size`. `serialized_size` should must be at least
356 // `TpuExecutableSerialize_GetByteSize(handle)`. `handle` must be non-null.
357 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_WriteToArray(
358     SE_ExecutableSerializationHandle* handle, int serialized_size,
359     uint8_t* serialized, TF_Status* status);
360 
361 // Safe to call if 'handle' is null.
362 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_FreeHandle(
363     SE_ExecutableSerializationHandle* handle);
364 
365 TFTPU_CAPI_EXPORT void TpuExecutable_Deserialize(int serialized_size,
366                                                  const uint8_t* serialized,
367                                                  SE_Executable** executable,
368                                                  TF_Status* status);
369 
370 // Caller is responsible for freeing the returned module's proto and its
371 // config's proto.
372 TFTPU_CAPI_EXPORT XLA_HloModule
373 TpuExecutable_HloModule(SE_Executable* executable);
374 
375 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*);
376 
377 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation.
378 TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation(
379     XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory,
380     XLA_Shape* serialized_tpu_shape, TF_Status* status);
381 
382 TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape,
383                                                 XLA_Shape* padded_shape,
384                                                 TF_Status* status);
385 
386 struct TfTpu_ExecutorApiFn {
387   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New);
388   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free);
389   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialize);
390   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialized);
391   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetExecutor);
392   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Id);
393   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount);
394   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit);
395   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy);
396   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr);
397   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation);
398   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetRuntimeVersion);
399 
400   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init);
401   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free);
402   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount);
403   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Allocate);
404   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Deallocate);
405   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetAllocatorStats);
406   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeviceMemoryUsage);
407   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateStream);
408   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateStream);
409   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateStreamDependency);
410   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetStatus);
411   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetCoreLocation);
412   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateEvent);
413   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateEvent);
414   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PollForEventStatus);
415   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_RecordEvent);
416   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForEvent);
417   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateTimer);
418   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateTimer);
419   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StartTimer);
420   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StopTimer);
421   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyToHost);
422   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyFromHost);
423   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyToHost);
424   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyFromHost);
425   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueInfeed);
426   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DequeueOutfeed);
427   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForInfeedReady);
428   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForOutfeedReady);
429   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockHostUntilDone);
430   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockUntilDoneOrFailed);
431   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SyncAndForgetFailedStreams);
432   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronizeAllActivity);
433   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_UnloadAllPrograms);
434   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueCompactionOnStreamForHbm);
435 
436   TFTPU_ADD_FN_IN_STRUCT(TpuStream_New);
437   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Free);
438   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Stream);
439   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Status);
440   TFTPU_ADD_FN_IN_STRUCT(TpuStream_IsSameSharedMemoryLocation);
441   TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferHostToDevice);
442   TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferDeviceToHost);
443   TFTPU_ADD_FN_IN_STRUCT(TpuStream_TpuEnqueueOnDeviceSendRecvLocal);
444 
445   TFTPU_ADD_FN_IN_STRUCT(TpuEvent_New);
446   TFTPU_ADD_FN_IN_STRUCT(TpuEvent_Free);
447 
448   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_New);
449   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Free);
450   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Nanoseconds);
451   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Microseconds);
452 
453   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New);
454   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create);
455   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set);
456   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free);
457   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message);
458   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code);
459   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Ok);
460 
461   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Default);
462   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_SetOrdinal);
463   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Free);
464 
465   TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_New);
466   TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_Free);
467 
468   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateDeviceDescription);
469   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_NewDeviceOptions);
470   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_FreeDeviceOptions);
471   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_HostCallback);
472 
473   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_New);
474   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_Free);
475   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_PlatformId);
476   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_HostShapeToDeviceShape);
477   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToDeviceAsync);
478   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice);
479   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement);
480   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ChooseCompactLayoutForShape);
481   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanShapedBufferBeAccessedNow);
482   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanBufferBeAccessedNow);
483   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable);
484   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout);
485   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers);
486   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers);
487   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToInfeed);
488   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferBuffersToInfeed);
489   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromOutfeed);
490   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ResetDevices);
491   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ReadDynamicShapes);
492 
493   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New);
494   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free);
495   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignDevices);
496   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignLocalDevices);
497 
498   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost);
499   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip);
500   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HostCount);
501   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipsPerHost);
502 
503   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X);
504   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y);
505   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z);
506   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip);
507   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_CoreForId);
508   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core);
509   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores);
510   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores);
511   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost);
512   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version);
513 
514   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates);
515   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates);
516   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index);
517   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id);
518 
519   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id);
520   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_NumCores);
521   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Cores);
522 
523   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New);
524   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free);
525   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses);
526   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend);
527   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile);
528   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize);
529   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream);
530   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeXlaShapeIndexArray);
531   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeMaybeOwningDeviceMemoryArray);
532   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint);
533   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Serialize);
534   TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_GetByteSize);
535   TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_WriteToArray);
536   TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_FreeHandle);
537   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Deserialize);
538   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_HloModule);
539   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free);
540 
541   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation);
542   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape);
543 };
544 }
545 
546 // extern "C"
547 
548 #endif  // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
549