1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 17 #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 18 19 #include <stddef.h> 20 #include <stdint.h> 21 22 #include "tensorflow/c/tf_attrtype.h" 23 #include "tensorflow/core/tpu/libtftpu.h" 24 #include "tensorflow/stream_executor/tpu/c_api_decl.h" 25 26 extern "C" { 27 28 SE_Platform* TpuPlatform_New(); 29 void TpuPlatform_Free(SE_Platform* platform); 30 void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size, 31 const char** options_key, 32 const char** options_value, TF_Status* status); 33 bool TpuPlatform_Initialized(SE_Platform* platform); 34 SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform, 35 SE_StreamExecutorConfig* config, 36 TF_Status* status); 37 SE_PlatformId TpuPlatform_Id(SE_Platform* platform); 38 int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); 39 int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); 40 bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform); 41 SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform); 42 SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform); 43 TpuRuntimeVersion TpuPlatform_GetRuntimeVersion(SE_Platform* platform); 44 45 void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, 46 SE_DeviceOptions* device_options, TF_Status* status); 47 void TpuExecutor_Free(SE_StreamExecutor* executor); 48 49 int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor); 50 51 SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor, 52 uint64_t size, int64_t memory_space); 53 void TpuExecutor_Deallocate(SE_StreamExecutor* executor, 54 SE_DeviceMemoryBase* memory); 55 bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor, 56 SE_AllocatorStats* stats); 57 bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free, 58 int64_t* total); 59 60 bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream); 61 void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor, 62 SE_Stream* stream); 63 bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor, 64 SE_Stream* dependent, SE_Stream* other); 65 void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream, 66 TF_Status* status); 67 68 SE_TpuTopology_Core* TpuExecutor_GetCoreLocation(SE_StreamExecutor* executor); 69 70 void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event, 71 TF_Status* status); 72 void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event, 73 TF_Status* status); 74 int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor, 75 SE_Event* event); 76 void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream, 77 SE_Event* event, TF_Status* status); 78 void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream, 79 SE_Event* event, TF_Status* status); 80 81 bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); 82 void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); 83 bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream, 84 SE_Timer* timer); 85 bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream, 86 SE_Timer* timer); 87 88 void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor, 89 void* host_dst, 90 const SE_DeviceMemoryBase* device_src, 91 uint64_t size, TF_Status* status); 92 void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor, 93 SE_DeviceMemoryBase* device_dst, 94 const void* host_src, uint64_t size, 95 TF_Status* status); 96 bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream, 97 void* host_dst, 98 const SE_DeviceMemoryBase* device_src, 99 uint64_t size); 100 101 bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream, 102 SE_DeviceMemoryBase* device_dst, 103 const void* host_src, uint64_t size); 104 105 void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor, 106 int32_t infeed_queue_index, const uint8_t* data, 107 int64_t size, TF_Status* status); 108 void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor, 109 int32_t outfeed_queue_index, uint8_t* data, 110 int64_t size, TF_Status* status); 111 void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor, 112 int32_t infeed_queue_index, 113 TF_Status* status); 114 void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor, 115 int32_t outfeed_queue_index, 116 TF_Status* status); 117 118 void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor, 119 SE_Stream* stream, TF_Status* status); 120 void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor, 121 TF_Status* status); 122 void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor); 123 bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor); 124 125 SE_Stream* TpuStream_New(SE_StreamExecutor* parent); 126 void TpuStream_Free(SE_Stream*); 127 void* TpuStream_Stream(SE_Stream*); 128 bool TpuStream_Status(SE_Stream*); 129 bool TpuStream_IsSameSharedMemoryLocation(SE_Stream*, SE_Stream*); 130 void TpuStream_EnqueueTransferHostToDevice(SE_Stream* stream, 131 SE_DeviceMemoryBase device_dst, 132 void* host_src, uint64_t size, 133 TF_Status* status); 134 void TpuStream_EnqueueTransferDeviceToHost(SE_Stream* stream, 135 SE_DeviceMemoryBase device_src, 136 void* host_dst, uint64_t size, 137 TF_Status* status); 138 void TpuStream_TpuEnqueueOnDeviceSendRecvLocal(SE_Stream* stream, 139 SE_DeviceMemoryBase send_buffer, 140 SE_DeviceMemoryBase recv_buffer, 141 TF_Status* status); 142 143 SE_Event* TpuEvent_New(SE_StreamExecutor* parent); 144 void TpuEvent_Free(SE_Event*); 145 146 SE_Timer* TpuTimer_New(SE_StreamExecutor* parent); 147 void TpuTimer_Free(SE_Timer*); 148 int64_t TpuTimer_Nanoseconds(SE_Timer*); 149 int64_t TpuTimer_Microseconds(SE_Timer*); 150 151 TF_Status* TpuStatus_New(); 152 TF_Status* TpuStatus_Create(int32_t code, const char* msg); 153 void TpuStatus_Set(TF_Status* status, int32_t code, const char* msg, 154 int32_t len); 155 void TpuStatus_Free(TF_Status* status); 156 const char* TpuStatus_Message(TF_Status* status); 157 int TpuStatus_Code(TF_Status* status); 158 bool TpuStatus_Ok(TF_Status* status); 159 160 SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default(); 161 void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal); 162 void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*); 163 164 SE_DeviceDescription* TpuDeviceDescription_New(); 165 void TpuDeviceDescription_Free(SE_DeviceDescription* description); 166 void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor, 167 SE_DeviceDescription* description, 168 TF_Status* status); 169 170 SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags); 171 void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options); 172 173 bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream, 174 SE_StatusCallbackFn callback_fn, void* ctx); 175 176 XLA_TransferManager* TpuTransferManager_New(); 177 void TpuTransferManager_Free(XLA_TransferManager* manager); 178 SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager); 179 void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager, 180 XLA_Shape* host_shape, 181 XLA_Shape* device_shape); 182 void TpuTransferManager_TransferLiteralToDeviceAsync( 183 XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal, 184 XLA_ShapedBuffer* device_buffer, TF_Status* status); 185 void TpuTransferManager_TransferLiteralFromDevice( 186 XLA_TransferManager* manager, SE_Stream* stream, 187 XLA_ShapedBuffer* device_buffer, XLA_Literal* literal, 188 XLA_StatusCallbackFn callback, void* ctx); 189 int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager, 190 XLA_Shape* shape); 191 void TpuTransferManager_ChooseCompactLayoutForShape( 192 XLA_TransferManager* manager, XLA_Shape* host_shape, XLA_Shape* output, 193 TF_Status* status); 194 bool TpuTransferManager_CanShapedBufferBeAccessedNow( 195 XLA_TransferManager* manager, SE_StreamExecutor* executor, 196 XLA_ShapedBuffer* device_buffer); 197 bool TpuTransferManager_CanBufferBeAccessedNow( 198 XLA_TransferManager* manager, SE_StreamExecutor* executor, 199 SE_DeviceMemoryBase* device_buffer); 200 void TpuTransferManager_WriteSingleTupleIndexTable( 201 XLA_TransferManager* manager, SE_Stream* stream, 202 SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, 203 SE_DeviceMemoryBase* region, TF_Status* status); 204 void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape, 205 XLA_Shape* infeed_shape); 206 void TpuTransferManager_LinearizeToBuffers( 207 XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array, 208 int64_t** buffers_size, int64_t* buffers_array_size, TF_Status* status); 209 void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size, 210 int64_t buffers_array_size); 211 void TpuTransferManager_TransferLiteralToInfeed(XLA_TransferManager* manager, 212 SE_StreamExecutor* executor, 213 XLA_Literal* c_literal, 214 TF_Status* status); 215 void TpuTransferManager_TransferBuffersToInfeed(XLA_TransferManager* manager, 216 SE_StreamExecutor* executor, 217 uint32_t** buffers_array, 218 int64_t* buffers_size_in_uint32, 219 int64_t buffers_array_size, 220 TF_Status* status); 221 void TpuTransferManager_TransferLiteralFromOutfeed( 222 XLA_TransferManager* manager, SE_StreamExecutor* executor, 223 XLA_Shape* shape /*deprecated*/, XLA_Literal* c_literal, TF_Status* status); 224 void TpuTransferManager_ResetDevices(XLA_TransferManager* manager, 225 SE_StreamExecutor** executors, 226 int64_t num_executors, TF_Status* status); 227 228 XLA_ComputationPlacer* TpuComputationPlacer_New(); 229 void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); 230 // `assignment` should be a preallocated array of size `replicate_count` * 231 // `computation_count`. The assignment will be constructed as a 2D array where 232 // assignment[replica][computation] = device_id. 233 void TpuComputationPlacer_AssignDevices(XLA_ComputationPlacer* placer, 234 int replica_count, 235 int computation_count, int* assignment, 236 TF_Status* status); 237 void TpuComputationPlacer_AssignLocalDevices(SE_TpuTopology_Host* host, 238 int replica_count, 239 int computation_count, 240 int* assignment, 241 TF_Status* status); 242 243 int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology, 244 TpuCoreTypeEnum tpu_core_type); 245 int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology, 246 TpuCoreTypeEnum tpu_core_type); 247 int TpuTopology_HostCount(SE_TpuTopology* tpu_topology); 248 int TpuTopology_ChipsPerHost(SE_TpuTopology* tpu_topology); 249 250 int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology); 251 int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology); 252 int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology); 253 bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z); 254 SE_TpuTopology_Core* TpuTopology_CoreForId(SE_TpuTopology* tpu_topology, 255 TpuCoreTypeEnum tpu_core_type, 256 int id); 257 SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology, 258 TpuCoreTypeEnum tpu_core_type, int x, 259 int y, int z, int index); 260 int TpuTopology_NumCores(SE_TpuTopology* tpu_topology, 261 TpuCoreTypeEnum tpu_core_type); 262 // 'cores' should be a preallocated array of size TpuTopology_NumCores. 263 void TpuTopology_Cores(SE_TpuTopology* tpu_topology, 264 TpuCoreTypeEnum tpu_core_type, 265 SE_TpuTopology_Core** cores); 266 int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z); 267 TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology); 268 void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location, 269 int* x, int* y, int* z); 270 void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location, 271 int* x, int* y, int* z); 272 int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location); 273 int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location); 274 275 int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location); 276 int TpuHostLocation_NumCores(SE_TpuTopology_Host* tpu_host_location, 277 TpuCoreTypeEnum tpu_core_type); 278 // 'cores' should be a preallocated array of size TpuHostLocation_NumCores. 279 void TpuHostLocation_Cores(SE_TpuTopology_Host* tpu_host_location, 280 TpuCoreTypeEnum tpu_core_type, 281 SE_TpuTopology_Core** cores); 282 283 // C API for XLA::Compiler interface 284 285 TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New(); 286 TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler); 287 288 TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses( 289 Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, 290 SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, 291 XLA_HloModule* result, TF_Status* status); 292 293 TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend( 294 Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, 295 SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, 296 SE_Executable** result, TF_Status* status); 297 298 TFTPU_CAPI_EXPORT void TpuCompiler_Compile( 299 Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group, 300 SE_StreamExecutorList* stream_exec_lists, int num_lists, 301 SE_DeviceMemoryAllocator* allocator, SE_Executable** executables, 302 TF_Status* status); 303 304 TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler, 305 XLA_Shape* c_shape); 306 307 TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( 308 SE_Executable* executable, SE_ExecutableRunOptions* se_options, 309 SE_ExecutionInput** se_arguments, int se_arguments_size, 310 SE_HloExecutionProfile* hlo_execution_profile, 311 SE_ExecutionOutput* se_output, TF_Status* status); 312 313 // This frees the XLA_ShapeIndex* array allocated when se_output is returned by 314 // TpuExecutable_ExecuteAsyncOnStream. 315 TFTPU_CAPI_EXPORT void TpuExecutable_FreeXlaShapeIndexArray( 316 XLA_ShapeIndex* array); 317 318 // This frees the SE_MaybeOwningDeviceMemory* array allocated when se_output is 319 // returned by TpuExecutable_ExecuteAsyncOnStream. 320 // Note that this only frees the heap-allocated array itself, and does not 321 // free any of the underlying device memory. 322 TFTPU_CAPI_EXPORT void TpuExecutable_FreeMaybeOwningDeviceMemoryArray( 323 SE_MaybeOwningDeviceMemory* array); 324 325 TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable, 326 const char** fingerprint, 327 size_t* size); 328 329 // Caller is responsible for freeing the returned module's proto and its 330 // config's proto. 331 TFTPU_CAPI_EXPORT XLA_HloModule 332 TpuExecutable_HloModule(SE_Executable* executable); 333 334 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*); 335 336 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation. 337 TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation( 338 XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory, 339 XLA_Shape* serialized_tpu_shape, TF_Status* status); 340 341 TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape, 342 XLA_Shape* padded_shape, 343 TF_Status* status); 344 345 struct TfTpu_ExecutorApiFn { 346 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New); 347 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free); 348 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialize); 349 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialized); 350 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetExecutor); 351 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Id); 352 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount); 353 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit); 354 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); 355 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr); 356 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation); 357 TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetRuntimeVersion); 358 359 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init); 360 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free); 361 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount); 362 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Allocate); 363 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Deallocate); 364 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetAllocatorStats); 365 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeviceMemoryUsage); 366 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateStream); 367 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateStream); 368 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateStreamDependency); 369 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetStatus); 370 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetCoreLocation); 371 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateEvent); 372 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateEvent); 373 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PollForEventStatus); 374 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_RecordEvent); 375 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForEvent); 376 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateTimer); 377 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateTimer); 378 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StartTimer); 379 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StopTimer); 380 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyToHost); 381 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyFromHost); 382 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyToHost); 383 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyFromHost); 384 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueInfeed); 385 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DequeueOutfeed); 386 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForInfeedReady); 387 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForOutfeedReady); 388 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockHostUntilDone); 389 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockUntilDoneOrFailed); 390 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SyncAndForgetFailedStreams); 391 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronizeAllActivity); 392 393 TFTPU_ADD_FN_IN_STRUCT(TpuStream_New); 394 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Free); 395 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Stream); 396 TFTPU_ADD_FN_IN_STRUCT(TpuStream_Status); 397 TFTPU_ADD_FN_IN_STRUCT(TpuStream_IsSameSharedMemoryLocation); 398 TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferHostToDevice); 399 TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferDeviceToHost); 400 TFTPU_ADD_FN_IN_STRUCT(TpuStream_TpuEnqueueOnDeviceSendRecvLocal); 401 402 TFTPU_ADD_FN_IN_STRUCT(TpuEvent_New); 403 TFTPU_ADD_FN_IN_STRUCT(TpuEvent_Free); 404 405 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_New); 406 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Free); 407 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Nanoseconds); 408 TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Microseconds); 409 410 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New); 411 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create); 412 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set); 413 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free); 414 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message); 415 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code); 416 TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Ok); 417 418 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Default); 419 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_SetOrdinal); 420 TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Free); 421 422 TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_New); 423 TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_Free); 424 425 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateDeviceDescription); 426 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_NewDeviceOptions); 427 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_FreeDeviceOptions); 428 TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_HostCallback); 429 430 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_New); 431 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_Free); 432 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_PlatformId); 433 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_HostShapeToDeviceShape); 434 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToDeviceAsync); 435 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice); 436 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); 437 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ChooseCompactLayoutForShape); 438 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanShapedBufferBeAccessedNow); 439 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanBufferBeAccessedNow); 440 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); 441 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout); 442 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers); 443 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers); 444 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToInfeed); 445 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferBuffersToInfeed); 446 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromOutfeed); 447 TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ResetDevices); 448 449 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); 450 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); 451 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignDevices); 452 TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignLocalDevices); 453 454 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost); 455 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip); 456 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HostCount); 457 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipsPerHost); 458 459 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X); 460 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y); 461 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z); 462 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip); 463 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_CoreForId); 464 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core); 465 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores); 466 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores); 467 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost); 468 TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version); 469 470 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates); 471 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates); 472 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); 473 TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); 474 475 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id); 476 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_NumCores); 477 TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Cores); 478 479 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New); 480 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free); 481 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); 482 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend); 483 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); 484 TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize); 485 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream); 486 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeXlaShapeIndexArray); 487 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeMaybeOwningDeviceMemoryArray); 488 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint); 489 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_HloModule); 490 TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free); 491 492 TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation); 493 TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape); 494 }; 495 } 496 497 // extern "C" 498 499 #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ 500