1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 17 #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 18 19 #include <stddef.h> 20 #include <stdint.h> 21 22 #include "tensorflow/c/tf_attrtype.h" 23 #include "tensorflow/core/tpu/libtftpu.h" 24 #include "tensorflow/stream_executor/tpu/c_api_decl.h" 25 26 extern "C" { 27 28 SE_Platform* TpuPlatform_New(); 29 void TpuPlatform_Free(SE_Platform* platform); 30 void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size, 31 const char** options_key, 32 const char** options_value, TF_Status* status); 33 bool TpuPlatform_Initialized(SE_Platform* platform); 34 SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform, 35 SE_StreamExecutorConfig* config, 36 TF_Status* status); 37 SE_PlatformId TpuPlatform_Id(SE_Platform* platform); 38 int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); 39 int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); 40 bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform); 41 SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform); 42 SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform); 43 TpuRuntimeVersion TpuPlatform_GetRuntimeVersion(SE_Platform* platform); 44 45 void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, 46 SE_DeviceOptions* device_options, TF_Status* status); 47 void TpuExecutor_Free(SE_StreamExecutor* executor); 48 49 int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor); 50 51 SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor, 52 uint64_t size, int64_t memory_space); 53 void TpuExecutor_Deallocate(SE_StreamExecutor* executor, 54 SE_DeviceMemoryBase* memory); 55 bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor, 56 SE_AllocatorStats* stats); 57 bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free, 58 int64_t* total); 59 60 bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream); 61 void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor, 62 SE_Stream* stream); 63 bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor, 64 SE_Stream* dependent, SE_Stream* other); 65 void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream, 66 TF_Status* status); 67 68 SE_TpuTopology_Core* TpuExecutor_GetCoreLocation(SE_StreamExecutor* executor); 69 70 void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event, 71 TF_Status* status); 72 void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event, 73 TF_Status* status); 74 int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor, 75 SE_Event* event); 76 void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream, 77 SE_Event* event, TF_Status* status); 78 void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream, 79 SE_Event* event, TF_Status* status); 80 81 bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); 82 void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); 83 bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream, 84 SE_Timer* timer); 85 bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream, 86 SE_Timer* timer); 87 88 void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor, 89 void* host_dst, 90 const SE_DeviceMemoryBase* device_src, 91 uint64_t size, TF_Status* status); 92 void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor, 93 SE_DeviceMemoryBase* device_dst, 94 const void* host_src, uint64_t size, 95 TF_Status* status); 96 bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream, 97 void* host_dst, 98 const SE_DeviceMemoryBase* device_src, 99 uint64_t size); 100 101 bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream, 102 SE_DeviceMemoryBase* device_dst, 103 const void* host_src, uint64_t size); 104 105 void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor, 106 int32_t infeed_queue_index, const uint8_t* data, 107 int64_t size, TF_Status* status); 108 void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor, 109 int32_t outfeed_queue_index, uint8_t* data, 110 int64_t size, TF_Status* status); 111 void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor, 112 int32_t infeed_queue_index, 113 TF_Status* status); 114 void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor, 115 int32_t outfeed_queue_index, 116 TF_Status* status); 117 118 void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor, 119 SE_Stream* stream, TF_Status* status); 120 void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor, 121 TF_Status* status); 122 void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor); 123 bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor); 124 125 void TpuExecutor_UnloadAllPrograms(SE_StreamExecutor* executor, 126 TF_Status* status); 127 void TpuExecutor_EnqueueCompactionOnStreamForHbm(SE_StreamExecutor* executor, 128 SE_Stream* compaction_stream, 129 TF_Status* status); 130 131 SE_Stream* TpuStream_New(SE_StreamExecutor* parent); 132 void TpuStream_Free(SE_Stream*); 133 void* TpuStream_Stream(SE_Stream*); 134 bool TpuStream_Status(SE_Stream*); 135 bool TpuStream_IsSameSharedMemoryLocation(SE_Stream*, SE_Stream*); 136 void TpuStream_EnqueueTransferHostToDevice(SE_Stream* stream, 137 SE_DeviceMemoryBase device_dst, 138 void* host_src, uint64_t size, 139 TF_Status* status); 140 void TpuStream_EnqueueTransferDeviceToHost(SE_Stream* stream, 141 SE_DeviceMemoryBase device_src, 142 void* host_dst, uint64_t size, 143 TF_Status* status); 144 void TpuStream_TpuEnqueueOnDeviceSendRecvLocal(SE_Stream* stream, 145 SE_DeviceMemoryBase send_buffer, 146 SE_DeviceMemoryBase recv_buffer, 147 TF_Status* status); 148 149 SE_Event* TpuEvent_New(SE_StreamExecutor* parent); 150 void TpuEvent_Free(SE_Event*); 151 152 SE_Timer* TpuTimer_New(SE_StreamExecutor* parent); 153 void TpuTimer_Free(SE_Timer*); 154 int64_t TpuTimer_Nanoseconds(SE_Timer*); 155 int64_t TpuTimer_Microseconds(SE_Timer*); 156 157 TF_Status* TpuStatus_New(); 158 TF_Status* TpuStatus_Create(int32_t code, const char* msg); 159 void TpuStatus_Set(TF_Status* status, int32_t code, const char* msg, 160 int32_t len); 161 void TpuStatus_Free(TF_Status* status); 162 const char* TpuStatus_Message(TF_Status* status); 163 int TpuStatus_Code(TF_Status* status); 164 bool TpuStatus_Ok(TF_Status* status); 165 166 SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default(); 167 void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal); 168 void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*); 169 170 SE_DeviceDescription* TpuDeviceDescription_New(); 171 void TpuDeviceDescription_Free(SE_DeviceDescription* description); 172 void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor, 173 SE_DeviceDescription* description, 174 TF_Status* status); 175 176 SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags); 177 void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options); 178 179 bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream, 180 SE_StatusCallbackFn callback_fn, void* ctx); 181 182 XLA_TransferManager* TpuTransferManager_New(); 183 void TpuTransferManager_Free(XLA_TransferManager* manager); 184 SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager); 185 void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager, 186 XLA_Shape* host_shape, 187 XLA_Shape* device_shape); 188 void TpuTransferManager_TransferLiteralToDeviceAsync( 189 XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal, 190 XLA_ShapedBuffer* device_buffer, TF_Status* status); 191 void TpuTransferManager_TransferLiteralFromDevice( 192 XLA_TransferManager* manager, SE_Stream* stream, 193 XLA_ShapedBuffer* device_buffer, XLA_Literal* literal, 194 XLA_StatusCallbackFn callback, void* ctx); 195 int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager, 196 XLA_Shape* shape); 197 void TpuTransferManager_ChooseCompactLayoutForShape( 198 XLA_TransferManager* manager, XLA_Shape* host_shape, XLA_Shape* output, 199 TF_Status* status); 200 bool TpuTransferManager_CanShapedBufferBeAccessedNow( 201 XLA_TransferManager* manager, SE_StreamExecutor* executor, 202 XLA_ShapedBuffer* device_buffer); 203 bool TpuTransferManager_CanBufferBeAccessedNow( 204 XLA_TransferManager* manager, SE_StreamExecutor* executor, 205 SE_DeviceMemoryBase* device_buffer); 206 void TpuTransferManager_WriteSingleTupleIndexTable( 207 XLA_TransferManager* manager, SE_Stream* stream, 208 SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, 209 SE_DeviceMemoryBase* region, TF_Status* status); 210 void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape, 211 XLA_Shape* infeed_shape); 212 void TpuTransferManager_LinearizeToBuffers( 213 XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array, 214 int64_t** buffers_size, int64_t* buffers_array_size, TF_Status* status); 215 void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size, 216 int64_t buffers_array_size); 217 void TpuTransferManager_TransferLiteralToInfeed(XLA_TransferManager* manager, 218 SE_StreamExecutor* executor, 219 XLA_Literal* c_literal, 220 TF_Status* status); 221 void TpuTransferManager_TransferBuffersToInfeed(XLA_TransferManager* manager, 222 SE_StreamExecutor* executor, 223 uint32_t** buffers_array, 224 int64_t* buffers_size_in_uint32, 225 int64_t buffers_array_size, 226 TF_Status* status); 227 void TpuTransferManager_TransferLiteralFromOutfeed( 228 XLA_TransferManager* manager, SE_StreamExecutor* executor, 229 XLA_Shape* shape /*deprecated*/, XLA_Literal* c_literal, TF_Status* status); 230 void TpuTransferManager_ResetDevices(XLA_TransferManager* manager, 231 SE_StreamExecutor** executors, 232 int64_t num_executors, TF_Status* status); 233 void TpuTransferManager_ReadDynamicShapes(SE_Stream* stream, 234 XLA_ShapedBuffer* buffer, 235 const XLA_Shape& original_shape, 236 XLA_Shape* updated_shape, 237 TF_Status* status); 238 239 XLA_ComputationPlacer* TpuComputationPlacer_New(); 240 void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); 241 // `assignment` should be a preallocated array of size `replicate_count` * 242 // `computation_count`. The assignment will be constructed as a 2D array where 243 // assignment[replica][computation] = device_id. 244 void TpuComputationPlacer_AssignDevices(XLA_ComputationPlacer* placer, 245 int replica_count, 246 int computation_count, int* assignment, 247 TF_Status* status); 248 void TpuComputationPlacer_AssignLocalDevices(SE_TpuTopology_Host* host, 249 int replica_count, 250 int computation_count, 251 int* assignment, 252 TF_Status* status); 253 254 int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology, 255 TpuCoreTypeEnum tpu_core_type); 256 int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology, 257 TpuCoreTypeEnum tpu_core_type); 258 int TpuTopology_HostCount(SE_TpuTopology* tpu_topology); 259 int TpuTopology_ChipsPerHost(SE_TpuTopology* tpu_topology); 260 261 int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology); 262 int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology); 263 int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology); 264 bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z); 265 SE_TpuTopology_Core* TpuTopology_CoreForId(SE_TpuTopology* tpu_topology, 266 TpuCoreTypeEnum tpu_core_type, 267 int id); 268 SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology, 269 TpuCoreTypeEnum tpu_core_type, int x, 270 int y, int z, int index); 271 int TpuTopology_NumCores(SE_TpuTopology* tpu_topology, 272 TpuCoreTypeEnum tpu_core_type); 273 // 'cores' should be a preallocated array of size TpuTopology_NumCores. 274 void TpuTopology_Cores(SE_TpuTopology* tpu_topology, 275 TpuCoreTypeEnum tpu_core_type, 276 SE_TpuTopology_Core** cores); 277 int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z); 278 TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology); 279 void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location, 280 int* x, int* y, int* z); 281 void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location, 282 int* x, int* y, int* z); 283 int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location); 284 int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location); 285 286 int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location); 287 int TpuHostLocation_NumCores(SE_TpuTopology_Host* tpu_host_location, 288 TpuCoreTypeEnum tpu_core_type); 289 // 'cores' should be a preallocated array of size TpuHostLocation_NumCores. 290 void TpuHostLocation_Cores(SE_TpuTopology_Host* tpu_host_location, 291 TpuCoreTypeEnum tpu_core_type, 292 SE_TpuTopology_Core** cores); 293 294 // C API for XLA::Compiler interface 295 296 TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New(); 297 TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler); 298 299 TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses( 300 Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, 301 SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, 302 XLA_HloModule* result, TF_Status* status); 303 304 TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend( 305 Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, 306 SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, 307 SE_Executable** result, TF_Status* status); 308 309 TFTPU_CAPI_EXPORT void TpuCompiler_Compile( 310 Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group, 311 SE_StreamExecutorList* stream_exec_lists, int num_lists, 312 SE_DeviceMemoryAllocator* allocator, SE_Executable** executables, 313 TF_Status* status); 314 315 TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler, 316 XLA_Shape* c_shape); 317 318 TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( 319 SE_Executable* executable, SE_ExecutableRunOptions* se_options, 320 SE_ExecutionInput** se_arguments, int se_arguments_size, 321 SE_HloExecutionProfile* hlo_execution_profile, 322 SE_ExecutionOutput* se_output, TF_Status* status); 323 324 // This frees the XLA_ShapeIndex* array allocated when se_output is returned by 325 // TpuExecutable_ExecuteAsyncOnStream. 326 TFTPU_CAPI_EXPORT void TpuExecutable_FreeXlaShapeIndexArray( 327 XLA_ShapeIndex* array); 328 329 // This frees the SE_MaybeOwningDeviceMemory* array allocated when se_output is 330 // returned by TpuExecutable_ExecuteAsyncOnStream. 331 // Note that this only frees the heap-allocated array itself, and does not 332 // free any of the underlying device memory. 333 TFTPU_CAPI_EXPORT void TpuExecutable_FreeMaybeOwningDeviceMemoryArray( 334 SE_MaybeOwningDeviceMemory* array); 335 336 TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable, 337 const char** fingerprint, 338 size_t* size); 339 340 // The serialization format is not guaranteed to be stable over time and has no 341 // compatibility guarantees (i.e. this is not a suitable long-term storage 342 // format). TpuExecutableSerialize_FreeHandle should be called after 'handle' is 343 // no longer needed. 'handle' is set to nullptr on error. 344 TFTPU_CAPI_EXPORT void TpuExecutable_Serialize( 345 SE_Executable* executable, SE_ExecutableSerializationHandle** handle, 346 TF_Status* status); 347 348 // Returns the size of the serialized executable in bytes, i.e. the size of the 349 // array that should be passed to TpuExecutableSerialize_WriteToArray. `handle` 350 // must be non-null. 351 TFTPU_CAPI_EXPORT size_t 352 TpuExecutableSerialize_GetByteSize(SE_ExecutableSerializationHandle* handle); 353 354 // Writes the serialized executable to `serialized`, which must be of size 355 // `serialized_size`. `serialized_size` should must be at least 356 // `TpuExecutableSerialize_GetByteSize(handle)`. `handle` must be non-null. 357 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_WriteToArray( 358 SE_ExecutableSerializationHandle* handle, int serialized_size, 359 uint8_t* serialized, TF_Status* status); 360 361 // Safe to call if 'handle' is null. 362 TFTPU_CAPI_EXPORT void TpuExecutableSerialize_FreeHandle( 363 SE_ExecutableSerializationHandle* handle); 364 365 TFTPU_CAPI_EXPORT void TpuExecutable_Deserialize(int serialized_size, 366 const uint8_t* serialized, 367 SE_Executable** executable, 368 TF_Status* status); 369 370 // Caller is responsible for freeing the returned module's proto and its 371 // config's proto. 372 TFTPU_CAPI_EXPORT XLA_HloModule 373 TpuExecutable_HloModule(SE_Executable* executable); 374 375 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*); 376 377 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation. 378 TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation( 379 XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory, 380 XLA_Shape* serialized_tpu_shape, TF_Status* status); 381 382 TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape, 383 XLA_Shape* padded_shape, 384 TF_Status* status); 385 386 struct TfTpu_ExecutorApiFn { 387 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New); 388 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free); 389 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialize); 390 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialized); 391 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetExecutor); 392 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Id); 393 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount); 394 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit); 395 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); 396 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr); 397 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation); 398 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetRuntimeVersion); 399 400 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init); 401 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free); 402 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount); 403 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Allocate); 404 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Deallocate); 405 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetAllocatorStats); 406 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeviceMemoryUsage); 407 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateStream); 408 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateStream); 409 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateStreamDependency); 410 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetStatus); 411 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetCoreLocation); 412 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateEvent); 413 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateEvent); 414 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PollForEventStatus); 415 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_RecordEvent); 416 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForEvent); 417 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateTimer); 418 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateTimer); 419 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StartTimer); 420 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StopTimer); 421 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyToHost); 422 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyFromHost); 423 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyToHost); 424 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyFromHost); 425 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueInfeed); 426 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DequeueOutfeed); 427 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForInfeedReady); 428 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForOutfeedReady); 429 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockHostUntilDone); 430 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockUntilDoneOrFailed); 431 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SyncAndForgetFailedStreams); 432 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronizeAllActivity); 433 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_UnloadAllPrograms); 434 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueCompactionOnStreamForHbm); 435 436 TFTPU_ADD_FN_IN_STRUCT(TpuStream_New); 437 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Free); 438 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Stream); 439 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Status); 440 TFTPU_ADD_FN_IN_STRUCT(TpuStream_IsSameSharedMemoryLocation); 441 TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferHostToDevice); 442 TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferDeviceToHost); 443 TFTPU_ADD_FN_IN_STRUCT(TpuStream_TpuEnqueueOnDeviceSendRecvLocal); 444 445 TFTPU_ADD_FN_IN_STRUCT(TpuEvent_New); 446 TFTPU_ADD_FN_IN_STRUCT(TpuEvent_Free); 447 448 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_New); 449 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Free); 450 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Nanoseconds); 451 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Microseconds); 452 453 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New); 454 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create); 455 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set); 456 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free); 457 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message); 458 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code); 459 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Ok); 460 461 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Default); 462 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_SetOrdinal); 463 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Free); 464 465 TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_New); 466 TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_Free); 467 468 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateDeviceDescription); 469 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_NewDeviceOptions); 470 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_FreeDeviceOptions); 471 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_HostCallback); 472 473 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_New); 474 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_Free); 475 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_PlatformId); 476 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_HostShapeToDeviceShape); 477 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToDeviceAsync); 478 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice); 479 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); 480 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ChooseCompactLayoutForShape); 481 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanShapedBufferBeAccessedNow); 482 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanBufferBeAccessedNow); 483 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); 484 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout); 485 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers); 486 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers); 487 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToInfeed); 488 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferBuffersToInfeed); 489 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromOutfeed); 490 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ResetDevices); 491 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ReadDynamicShapes); 492 493 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); 494 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); 495 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignDevices); 496 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignLocalDevices); 497 498 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost); 499 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip); 500 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HostCount); 501 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipsPerHost); 502 503 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X); 504 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y); 505 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z); 506 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip); 507 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_CoreForId); 508 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core); 509 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores); 510 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores); 511 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost); 512 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version); 513 514 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates); 515 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates); 516 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); 517 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); 518 519 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id); 520 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_NumCores); 521 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Cores); 522 523 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New); 524 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free); 525 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); 526 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend); 527 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); 528 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize); 529 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream); 530 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeXlaShapeIndexArray); 531 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeMaybeOwningDeviceMemoryArray); 532 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint); 533 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Serialize); 534 TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_GetByteSize); 535 TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_WriteToArray); 536 TFTPU_ADD_FN_IN_STRUCT(TpuExecutableSerialize_FreeHandle); 537 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Deserialize); 538 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_HloModule); 539 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free); 540 541 TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation); 542 TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape); 543 }; 544 } 545 546 // extern "C" 547 548 #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 549