1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #ifndef TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_ 16 #define TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_ 17 18 #include "tensorflow/c/c_api.h" 19 #include "tensorflow/c/eager/c_api.h" 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif 24 25 // Resets `op_to_reset` with `op_or_function_name` and `raw_device_name`. This 26 // is for performance optimization by reusing an exiting unused op rather than 27 // creating a new op every time. If `raw_device_name` is `NULL` or empty, it 28 // does not set the device name. If it's not `NULL`, then it attempts to parse 29 // and set the device name. It's effectively `TFE_OpSetDevice`, but it is faster 30 // than separately calling it because if the existing op has the same 31 // `raw_device_name`, it skips parsing and just leave as it is. 32 TF_CAPI_EXPORT extern void TFE_OpReset(TFE_Op* op_to_reset, 33 const char* op_or_function_name, 34 const char* raw_device_name, 35 TF_Status* status); 36 37 // Enables only graph collection in RunMetadata on the functions executed from 38 // this context. 39 TF_CAPI_EXPORT extern void TFE_ContextEnableGraphCollection(TFE_Context* ctx); 40 41 // Disables only graph collection in RunMetadata on the functions executed from 42 // this context. 43 TF_CAPI_EXPORT extern void TFE_ContextDisableGraphCollection(TFE_Context* ctx); 44 45 // TODO(fishx): Move these monitoring APIs into a separate file. 46 // ----------------------------------------------------------------------------- 47 // Monitoring Counter APIs. 48 // These APIs de-templated monitoring Counter for swig. 49 50 typedef struct TFE_MonitoringCounterCell TFE_MonitoringCounterCell; 51 52 // Atomically increments the value of the cell. The value must be non-negative. 53 TF_CAPI_EXPORT extern void TFE_MonitoringCounterCellIncrementBy( 54 TFE_MonitoringCounterCell* cell, int64_t value); 55 56 // Retrieves the current value of the cell. 57 TF_CAPI_EXPORT extern int64_t TFE_MonitoringCounterCellValue( 58 TFE_MonitoringCounterCell* cell); 59 60 // APIs for Counter without label. 61 typedef struct TFE_MonitoringCounter0 TFE_MonitoringCounter0; 62 // Returns a new Counter metric object. The caller should manage lifetime of 63 // the object. Using duplicate metric name will crash the program with fatal 64 // error. 65 TF_CAPI_EXPORT extern TFE_MonitoringCounter0* TFE_MonitoringNewCounter0( 66 const char* name, TF_Status* status, const char* description); 67 // Deletes the Counter object. 68 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter0( 69 TFE_MonitoringCounter0* counter); 70 // Retrieves the cell from the Counter object. The Counter object will manage 71 // lifetime of the cell. 72 TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter0( 73 TFE_MonitoringCounter0* counter); 74 75 // APIs for Counter with 1 label. 76 typedef struct TFE_MonitoringCounter1 TFE_MonitoringCounter1; 77 TF_CAPI_EXPORT extern TFE_MonitoringCounter1* TFE_MonitoringNewCounter1( 78 const char* name, TF_Status* status, const char* description, 79 const char* label1); 80 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter1( 81 TFE_MonitoringCounter1* counter); 82 TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter1( 83 TFE_MonitoringCounter1* counter, const char* label1); 84 85 // APIs for Counter with 2 labels. 86 typedef struct TFE_MonitoringCounter2 TFE_MonitoringCounter2; 87 TF_CAPI_EXPORT extern TFE_MonitoringCounter2* TFE_MonitoringNewCounter2( 88 const char* name, TF_Status* status, const char* description, 89 const char* label1, const char* label2); 90 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter2( 91 TFE_MonitoringCounter2* counter); 92 TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter2( 93 TFE_MonitoringCounter2* counter, const char* label1, const char* label2); 94 95 // ----------------------------------------------------------------------------- 96 // Monitoring Gauge APIs. 97 // These APIs de-templated monitoring Gauge for swig. 98 99 typedef struct TFE_MonitoringIntGaugeCell TFE_MonitoringIntGaugeCell; 100 101 // Atomically set the value of the cell. 102 TF_CAPI_EXPORT extern void TFE_MonitoringIntGaugeCellSet( 103 TFE_MonitoringIntGaugeCell* cell, int64_t value); 104 105 // Retrieves the current value of the cell. 106 TF_CAPI_EXPORT extern int64_t TFE_MonitoringIntGaugeCellValue( 107 TFE_MonitoringIntGaugeCell* cell); 108 109 // APIs for Int Gauge without label. 110 typedef struct TFE_MonitoringIntGauge0 TFE_MonitoringIntGauge0; 111 TF_CAPI_EXPORT extern TFE_MonitoringIntGauge0* TFE_MonitoringNewIntGauge0( 112 const char* name, TF_Status* out_status, const char* description); 113 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge0( 114 TFE_MonitoringIntGauge0* gauge); 115 TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell* 116 TFE_MonitoringGetCellIntGauge0(TFE_MonitoringIntGauge0* gauge); 117 118 // APIs for Int Gauge with 1 label. 119 typedef struct TFE_MonitoringIntGauge1 TFE_MonitoringIntGauge1; 120 TF_CAPI_EXPORT extern TFE_MonitoringIntGauge1* TFE_MonitoringNewIntGauge1( 121 const char* name, TF_Status* out_status, const char* description, 122 const char* label1); 123 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge1( 124 TFE_MonitoringIntGauge1* gauge); 125 TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell* 126 TFE_MonitoringGetCellIntGauge1(TFE_MonitoringIntGauge1* gauge, 127 const char* label1); 128 129 // APIs for Int Gauge with 2 label. 130 typedef struct TFE_MonitoringIntGauge2 TFE_MonitoringIntGauge2; 131 TF_CAPI_EXPORT extern TFE_MonitoringIntGauge2* TFE_MonitoringNewIntGauge2( 132 const char* name, TF_Status* out_status, const char* description, 133 const char* label1, const char* label2); 134 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge2( 135 TFE_MonitoringIntGauge2* gauge); 136 TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell* 137 TFE_MonitoringGetCellIntGauge2(TFE_MonitoringIntGauge2* gauge, 138 const char* label1, const char* label2); 139 140 typedef struct TFE_MonitoringStringGaugeCell TFE_MonitoringStringGaugeCell; 141 TF_CAPI_EXPORT extern void TFE_MonitoringStringGaugeCellSet( 142 TFE_MonitoringStringGaugeCell* cell, const char* value); 143 // Retrieves the string value and saves it in buffer. 144 TF_CAPI_EXPORT extern const void TFE_MonitoringStringGaugeCellValue( 145 TFE_MonitoringStringGaugeCell* cell, TF_Buffer* buf); 146 147 // APIs for String Gauge without label. 148 typedef struct TFE_MonitoringStringGauge0 TFE_MonitoringStringGauge0; 149 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge0* TFE_MonitoringNewStringGauge0( 150 const char* name, TF_Status* out_status, const char* description); 151 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge0( 152 TFE_MonitoringStringGauge0* gauge); 153 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell* 154 TFE_MonitoringGetCellStringGauge0(TFE_MonitoringStringGauge0* gauge); 155 156 // APIs for String Gauge with 1 label. 157 typedef struct TFE_MonitoringStringGauge1 TFE_MonitoringStringGauge1; 158 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge1* TFE_MonitoringNewStringGauge1( 159 const char* name, TF_Status* out_status, const char* description, 160 const char* label1); 161 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge1( 162 TFE_MonitoringStringGauge1* gauge); 163 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell* 164 TFE_MonitoringGetCellStringGauge1(TFE_MonitoringStringGauge1* gauge, 165 const char* label1); 166 167 // APIs for String Gauge with 2 label. 168 typedef struct TFE_MonitoringStringGauge2 TFE_MonitoringStringGauge2; 169 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge2* TFE_MonitoringNewStringGauge2( 170 const char* name, TF_Status* out_status, const char* description, 171 const char* label1, const char* label2); 172 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge2( 173 TFE_MonitoringStringGauge2* gauge); 174 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell* 175 TFE_MonitoringGetCellStringGauge2(TFE_MonitoringStringGauge2* gauge, 176 const char* label1, const char* label2); 177 178 // APIs for String Gauge with 3 labels. 179 typedef struct TFE_MonitoringStringGauge3 TFE_MonitoringStringGauge3; 180 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge3* TFE_MonitoringNewStringGauge3( 181 const char* name, TF_Status* out_status, const char* description, 182 const char* label1, const char* label2, const char* label3); 183 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge3( 184 TFE_MonitoringStringGauge3* gauge); 185 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell* 186 TFE_MonitoringGetCellStringGauge3(TFE_MonitoringStringGauge3* gauge, 187 const char* label1, const char* label2, 188 const char* label3); 189 190 // APIs for String Gauge with 4 labels. 191 typedef struct TFE_MonitoringStringGauge4 TFE_MonitoringStringGauge4; 192 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge4* TFE_MonitoringNewStringGauge4( 193 const char* name, TF_Status* out_status, const char* description, 194 const char* label1, const char* label2, const char* label3, 195 const char* label4); 196 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge4( 197 TFE_MonitoringStringGauge4* gauge); 198 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell* 199 TFE_MonitoringGetCellStringGauge4(TFE_MonitoringStringGauge4* gauge, 200 const char* label1, const char* label2, 201 const char* label3, const char* label4); 202 203 typedef struct TFE_MonitoringBoolGaugeCell TFE_MonitoringBoolGaugeCell; 204 TF_CAPI_EXPORT extern void TFE_MonitoringBoolGaugeCellSet( 205 TFE_MonitoringBoolGaugeCell* cell, bool value); 206 TF_CAPI_EXPORT extern bool TFE_MonitoringBoolGaugeCellValue( 207 TFE_MonitoringBoolGaugeCell* cell); 208 209 // APIs for Bool Gauge without label. 210 typedef struct TFE_MonitoringBoolGauge0 TFE_MonitoringBoolGauge0; 211 TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge0* TFE_MonitoringNewBoolGauge0( 212 const char* name, TF_Status* out_status, const char* description); 213 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge0( 214 TFE_MonitoringBoolGauge0* gauge); 215 TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell* 216 TFE_MonitoringGetCellBoolGauge0(TFE_MonitoringBoolGauge0* gauge); 217 218 // APIs for Bool Gauge with 1 label. 219 typedef struct TFE_MonitoringBoolGauge1 TFE_MonitoringBoolGauge1; 220 TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge1* TFE_MonitoringNewBoolGauge1( 221 const char* name, TF_Status* out_status, const char* description, 222 const char* label1); 223 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge1( 224 TFE_MonitoringBoolGauge1* gauge); 225 TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell* 226 TFE_MonitoringGetCellBoolGauge1(TFE_MonitoringBoolGauge1* gauge, 227 const char* label1); 228 229 // APIs for Bool Gauge with 2 label. 230 typedef struct TFE_MonitoringBoolGauge2 TFE_MonitoringBoolGauge2; 231 TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge2* TFE_MonitoringNewBoolGauge2( 232 const char* name, TF_Status* out_status, const char* description, 233 const char* label1, const char* label2); 234 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge2( 235 TFE_MonitoringBoolGauge2* gauge); 236 TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell* 237 TFE_MonitoringGetCellBoolGauge2(TFE_MonitoringBoolGauge2* gauge, 238 const char* label1, const char* label2); 239 240 // ----------------------------------------------------------------------------- 241 // Monitoring Sampler APIs. 242 // These APIs de-templated monitoring Sampler for swig. 243 244 typedef struct TFE_MonitoringSamplerCell TFE_MonitoringSamplerCell; 245 246 // Atomically add the value of the cell. 247 TF_CAPI_EXPORT extern void TFE_MonitoringSamplerCellAdd( 248 TFE_MonitoringSamplerCell* cell, double value); 249 250 // Retrieves the current value of the cell. The return value is a HistogramProto 251 // saved in buffer. 252 TF_CAPI_EXPORT extern void TFE_MonitoringSamplerCellValue( 253 TFE_MonitoringSamplerCell* cell, TF_Buffer* buf); 254 255 // APIs for sampler buckets 256 typedef struct TFE_MonitoringBuckets TFE_MonitoringBuckets; 257 TF_CAPI_EXPORT extern TFE_MonitoringBuckets* 258 TFE_MonitoringNewExponentialBuckets(double scale, double growth_factor, 259 int bucket_count); 260 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBuckets( 261 TFE_MonitoringBuckets* buckets); 262 263 // APIs for Sampler without label. 264 typedef struct TFE_MonitoringSampler0 TFE_MonitoringSampler0; 265 TF_CAPI_EXPORT extern TFE_MonitoringSampler0* TFE_MonitoringNewSampler0( 266 const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status, 267 const char* description); 268 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler0( 269 TFE_MonitoringSampler0* sampler); 270 TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler0( 271 TFE_MonitoringSampler0* sampler); 272 273 // APIs for Sampler with 1 label. 274 typedef struct TFE_MonitoringSampler1 TFE_MonitoringSampler1; 275 TF_CAPI_EXPORT extern TFE_MonitoringSampler1* TFE_MonitoringNewSampler1( 276 const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status, 277 const char* description, const char* label1); 278 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler1( 279 TFE_MonitoringSampler1* sampler); 280 TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler1( 281 TFE_MonitoringSampler1* sampler, const char* label1); 282 283 // APIs for Sampler with 2 label. 284 typedef struct TFE_MonitoringSampler2 TFE_MonitoringSampler2; 285 TF_CAPI_EXPORT extern TFE_MonitoringSampler2* TFE_MonitoringNewSampler2( 286 const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status, 287 const char* description, const char* label1, const char* label2); 288 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler2( 289 TFE_MonitoringSampler2* sampler); 290 TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler2( 291 TFE_MonitoringSampler2* sampler, const char* label1, const char* label2); 292 293 // Sets whether to use TFRT 294 TF_CAPI_EXPORT extern void TFE_ContextOptionsSetTfrt(TFE_ContextOptions*, 295 bool use_tfrt); 296 297 // Sets whether to use TFRT distributed runtime 298 TF_CAPI_EXPORT extern void TFE_ContextOptionsSetTfrtDistributedRuntime( 299 TFE_ContextOptions* options, bool use_tfrt_distributed_runtime); 300 301 // Returns the context_id from the EagerContext which is used by the 302 // EagerService to maintain consistency between client and worker. The 303 // context_id is initialized with a dummy value and is later set when the worker 304 // is initialized (either locally or remotely). The context_id can change during 305 // the process lifetime although this should cause the worker to be 306 // reinitialized (e.g. cleared caches) as well. 307 TF_CAPI_EXPORT extern uint64_t TFE_GetContextId(TFE_Context* ctx); 308 309 // ----------------------------------------------------------------------------- 310 // Cancellation APIs. 311 312 typedef struct TFE_CancellationManager TFE_CancellationManager; 313 TF_CAPI_EXPORT extern TFE_CancellationManager* TFE_NewCancellationManager(); 314 TF_CAPI_EXPORT extern bool TFE_CancellationManagerIsCancelled( 315 TFE_CancellationManager*); 316 TF_CAPI_EXPORT extern void TFE_CancellationManagerStartCancel( 317 TFE_CancellationManager*); 318 TF_CAPI_EXPORT extern void TFE_DeleteCancellationManager( 319 TFE_CancellationManager*); 320 321 // Associates the given `cancellation_manager` with `op`, so that invoking 322 // `TFE_CancellationManagerStartCancel(cancellation_manager)` will cancel the 323 // execution of `op`. 324 typedef struct TFE_CancellationManager TFE_CancellationManager; 325 TF_CAPI_EXPORT extern void TFE_OpSetCancellationManager( 326 TFE_Op* op, TFE_CancellationManager* cancellation_manager, 327 TF_Status* status); 328 329 // ----------------------------------------------------------------------------- 330 // Eager Executor APIs. 331 typedef struct TFE_Executor TFE_Executor; 332 333 // Creates a new eager Executor. Nodes in one executor are guaranteed to be 334 // executed in sequence. Assigning nodes to different executors allows executing 335 // nodes in parallel. 336 TF_CAPI_EXPORT extern TFE_Executor* TFE_NewExecutor(bool is_async); 337 338 // Deletes the eager Executor without waiting for enqueued nodes. Please call 339 // TFE_ExecutorWaitForAllPendingNodes before calling this API if you want to 340 // make sure all nodes are finished. 341 TF_CAPI_EXPORT extern void TFE_DeleteExecutor(TFE_Executor*); 342 343 // Returns true if the executor is in async mode. 344 TF_CAPI_EXPORT extern bool TFE_ExecutorIsAsync(TFE_Executor*); 345 346 // Causes the calling thread to block till all ops dispatched in this executor 347 // have been executed. Note that "execution" here refers to kernel execution / 348 // scheduling of copies, etc. Similar to sync execution, it doesn't guarantee 349 // that lower level device queues (like GPU streams) have been flushed. 350 // 351 // This call may not block for execution of ops enqueued concurrently with this 352 // call. 353 TF_CAPI_EXPORT extern void TFE_ExecutorWaitForAllPendingNodes( 354 TFE_Executor*, TF_Status* status); 355 356 // When an error happens, any pending operations are discarded and newly issued 357 // ops return an error. This call clears the error state and re-enables 358 // execution of newly issued ops. 359 // 360 // Note that outputs of discarded ops remain in a corrupt state and should not 361 // be used for future calls. 362 // TODO(agarwal): mark the affected handles and raise errors if they are used. 363 TF_CAPI_EXPORT extern void TFE_ExecutorClearError(TFE_Executor*); 364 365 // Sets a custom Executor for current thread. All nodes created by this thread 366 // will be added to this Executor. It will override current executor. 367 TF_CAPI_EXPORT extern void TFE_ContextSetExecutorForThread(TFE_Context*, 368 TFE_Executor*); 369 370 // Returns the Executor for current thread. 371 TF_CAPI_EXPORT extern TFE_Executor* TFE_ContextGetExecutorForThread( 372 TFE_Context*); 373 374 // ----------------------------------------------------------------------------- 375 // Dynamic cluster API. 376 377 // Update an existing context with a new set of servers defined in a ServerDef 378 // proto. Servers can be added to and removed from the list of remote workers 379 // in the context. New set of servers identified by the ServerDef must be up 380 // when the context is updated. 381 // 382 // This API is for experimental usage and may be subject to change. 383 TF_CAPI_EXPORT extern void TFE_ContextUpdateServerDef(TFE_Context* ctx, 384 int keep_alive_secs, 385 const void* proto, 386 size_t proto_len, 387 TF_Status* status); 388 389 // Checks whether a remote worker is alive or not. This will return true even if 390 // the context doesn't exist on the remote worker. 391 TF_CAPI_EXPORT extern bool TFE_ContextCheckAlive(TFE_Context* ctx, 392 const char* worker_name, 393 TF_Status* status); 394 395 // Sync pending nodes in local executors (including the context default executor 396 // and thread executors) and streaming requests to remote executors, and get the 397 // combined status. 398 TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context* ctx, 399 TF_Status* status); 400 401 // This function will block till the operation that produces `h` has 402 // completed. This is only valid on local TFE_TensorHandles. The pointer 403 // returned will be on the device in which the TFE_TensorHandle resides (so e.g. 404 // for a GPU tensor this will return a pointer to GPU memory). The pointer is 405 // only guaranteed to be valid until TFE_DeleteTensorHandle is called on this 406 // TensorHandle. Only supports POD data types. 407 TF_CAPI_EXPORT extern void* TFE_TensorHandleDevicePointer(TFE_TensorHandle*, 408 TF_Status*); 409 410 // This function will block till the operation that produces `h` has 411 // completed. This is only valid on local TFE_TensorHandles. Returns the size in 412 // bytes of the memory pointed to by the device pointer returned above. 413 TF_CAPI_EXPORT extern size_t TFE_TensorHandleDeviceMemorySize(TFE_TensorHandle*, 414 TF_Status*); 415 416 // Creates a new TensorHandle from memory residing in the physical device 417 // device_name. Takes ownership of the memory, and will call deleter to release 418 // it after TF no longer needs it or in case of error. 419 // 420 // Custom devices must use TFE_NewCustomDeviceTensorHandle instead. 421 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandleFromDeviceMemory( 422 TFE_Context* ctx, const char* device_name, TF_DataType, const int64_t* dims, 423 int num_dims, void* data, size_t len, 424 void (*deallocator)(void* data, size_t len, void* arg), 425 void* deallocator_arg, TF_Status* status); 426 427 // Retrieves the address space (i.e. job, replia, task) of the local host and 428 // saves it in the buffer. 429 TF_CAPI_EXPORT extern void TFE_HostAddressSpace(TFE_Context* ctx, 430 TF_Buffer* buf); 431 432 // APIs for generically dealing with op attributes (e.g. when forwarding them 433 // through custom device implementations). 434 // 435 // TODO(allenl): Currently these are black boxes, but we should have some way to 436 // inspect values. This would let people e.g. copy over most attributes and then 437 // modify some based on their values. 438 439 // A reference to an op's name -> attribute mapping 440 typedef struct TFE_OpAttrs TFE_OpAttrs; 441 442 // Fetch a reference to `op`'s attributes. The returned reference is only valid 443 // while `op` is alive. 444 TF_CAPI_EXPORT extern const TFE_OpAttrs* TFE_OpGetAttrs(const TFE_Op* op); 445 // Add attributes in `attrs` to `op`. 446 // 447 // Does not overwrite or update existing attributes, but adds new ones. 448 TF_CAPI_EXPORT extern void TFE_OpAddAttrs(TFE_Op* op, const TFE_OpAttrs* attrs); 449 450 // Serialize `attrs` as a tensorflow::NameAttrList protocol buffer (into `buf`), 451 // containing the op name and a map of its attributes. 452 TF_CAPI_EXPORT extern void TFE_OpAttrsSerialize(const TFE_OpAttrs* attrs, 453 TF_Buffer* buf, 454 TF_Status* status); 455 456 // Set an op's attribute from a serialized AttrValue protocol buffer. 457 // 458 // Analogous to TF_SetAttrValueProto for building graph operations. 459 TF_CAPI_EXPORT extern void TFE_OpSetAttrValueProto(const TFE_Op* op, 460 const char* attr_name, 461 const void* proto, 462 size_t proto_len, 463 TF_Status* status); 464 465 // TODO(b/166642410): It would be nice, for custom devices and for other users, 466 // to have a non-string representation of devices (TF_Device) extracted from 467 // tensors/ops/etc. and usable in APIs like OpSetDevice/ResetOp/etc. 468 469 #define TFE_CUSTOM_DEVICE_VERSION 4 470 471 // Struct to be filled in. Functions are required except where indicated. 472 typedef struct TFE_CustomDevice { 473 int version = TFE_CUSTOM_DEVICE_VERSION; 474 // Method to copy a tensor to the custom device. 475 TFE_TensorHandle* (*copy_tensor_to_device)(TFE_Context* context, 476 TFE_TensorHandle* tensor, 477 TF_Status* status, 478 void* device_info); 479 480 // Method to copy a tensor from the custom device to a target device. 481 TFE_TensorHandle* (*copy_tensor_from_device)(TFE_Context* context, 482 TFE_TensorHandle* tensor, 483 const char* target_device_name, 484 TF_Status* status, 485 void* device_info); 486 487 // Method to execute an operation. 488 // 489 // Arguments provide enough information to reconstruct the original `TFE_Op`, 490 // or construct a transformed version, by inspecting the passed `op`. 491 // 492 // TFE_OpGetDevice(op) records the original placement of the operation. It may 493 // be an empty string if no device was explicitly requested, but will 494 // otherwise be the name of this custom device. Ops are placed onto a custom 495 // device if any of their inputs are on that custom device, but custom devices 496 // are free to set a bad status in order to require explicit placement. 497 void (*execute)(const TFE_Op* op, int* num_outputs, 498 TFE_TensorHandle** outputs, TF_Status* s, void* device_info); 499 500 // Method to delete a device. 501 void (*delete_device)(void* device_info); 502 503 // Implements TFE_CreatePackedTensorHandle when one of `handles` is on this 504 // custom device. 505 // 506 // Many devices will want to simply return an "unimplemented" status 507 // here. This is the default behavior if `pack` is null when passed to 508 // TFE_RegisterCustomDevice. 509 TFE_TensorHandle* (*pack)(TFE_Context* context, TFE_TensorHandle** handles, 510 int num_handles, TF_Status* s, 511 void* device_info) = nullptr; 512 } TFE_CustomDevice; 513 514 // Registers a custom device for use with eager execution. 515 // 516 // Eager operations may be placed on this device, e.g. `with 517 // tf.device("CUSTOM"):` from Python if `device_name` for this call is 518 // "/job:localhost/replica:0/task:0/device:CUSTOM:0". 519 // 520 // The custom device defines copy operations for moving TensorHandles on and 521 // off, and an execution operation for named operations. Often execution will 522 // simply wrap op execution on one or more physical devices. 523 // 524 // device_info is an opaque caller-defined type stored with the custom device 525 // which is passed to the functions referenced in the TFE_CustomDevice struct 526 // `device` (execute, delete_device, etc.). It can for example contain the 527 // names of wrapped devices. 528 // 529 // There are currently no graph semantics implemented for registered custom 530 // devices, so executing tf.functions which contain operations placed on custom 531 // devices will fail. 532 // 533 // `device_name` must not name an existing physical or custom device. It must 534 // follow the format: 535 // 536 // /job:<name>/replica:<replica>/task:<task>/device:<type>:<device_num> 537 // 538 // If the device is successfully registered, `status` is set to TF_OK. Otherwise 539 // the device is not usable. In case of a bad status, `device.delete_device` is 540 // still called on `device_info` (i.e. the caller does not retain ownership). 541 // 542 // This API is highly experimental, and in particular is expected to change when 543 // it starts supporting operations with attributes and when tf.function support 544 // is added. 545 TF_CAPI_EXPORT extern void TFE_RegisterCustomDevice(TFE_Context* ctx, 546 TFE_CustomDevice device, 547 const char* device_name, 548 void* device_info, 549 TF_Status* status); 550 551 // Struct to be filled in to define a custom device tensor handle. Fields are 552 // required except where indicated. 553 typedef struct TFE_CustomDeviceTensorHandleMethods { 554 int version = TFE_CUSTOM_DEVICE_VERSION; 555 556 // Computes the rank of the tensor handle. 557 // 558 // Shapes are specified via callbacks because retrieving the shape of a tensor 559 // is a blocking operation for async eager; custom devices should avoid 560 // retrieving shapes of tensors they wrap until the custom device tensor's 561 // shape is explicitly requested where possible. 562 int (*num_dims)(void* data, TF_Status* status); 563 564 // Computes the axis length at `dim_index`. 565 int64_t (*dim)(void* data, int dim_index, TF_Status* status); 566 567 void (*deallocator)(void* data); 568 569 // Summarizes the value of this tensor. The caller takes ownership of the 570 // returned buffer. If `status` is not TF_OK, instead returns a null pointer. 571 // 572 // Does not include the shape and dtype of the tensor (which is generally 573 // appended later), but should include any information specific to this custom 574 // device which would be useful for debugging. 575 // 576 // Optional. If null, defaults to resolving the TFE_TensorHandle into a 577 // TF_Tensor and summarizing that. 578 TF_Buffer* (*summarize)(void* data, TF_Status* status) = nullptr; 579 } TFE_CustomDeviceTensorHandle; 580 581 // Creates a new TensorHandle from memory residing in a custom device. Takes 582 // ownership of the memory pointed to by `tensor_handle_data`, and calls 583 // `methods.deallocator` to release it after TF no longer needs it or in case of 584 // an error. 585 // 586 // This call is similar to `TFE_NewTensorHandleFromDeviceMemory`, but supports 587 // custom devices instead of physical devices and does not require blocking 588 // waiting for exact shapes. 589 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewCustomDeviceTensorHandle( 590 TFE_Context*, const char* device_name, TF_DataType, void* data, 591 TFE_CustomDeviceTensorHandle methods, TF_Status* status); 592 593 TF_CAPI_EXPORT extern void TFE_ContextGetFunctionDef(TFE_Context* ctx, 594 const char* function_name, 595 TF_Buffer* buf, 596 TF_Status* status); 597 598 // Allocate and return a new Tensor on the host. 599 // 600 // The caller must set the Tensor values by writing them to the pointer returned 601 // by TF_TensorData with length TF_TensorByteSize. 602 TF_CAPI_EXPORT extern TF_Tensor* TFE_AllocateHostTensor(TFE_Context* ctx, 603 TF_DataType dtype, 604 const int64_t* dims, 605 int num_dims, 606 TF_Status* status); 607 608 // Given a Tensor, wrap it with a TensorHandle 609 // 610 // Similar to TFE_NewTensorHandle, but includes a pointer to the TFE_Context. 611 // The context should be identical to that of the Tensor. 612 TF_CAPI_EXPORT TFE_TensorHandle* TFE_NewTensorHandleFromTensor( 613 TFE_Context* ctx, TF_Tensor* t, TF_Status* status); 614 615 // Create a packed TensorHandle with the given list of TensorHandles. 616 // If `handles` are on the same device, assign the same device to the packed 617 // handle; if `handles` are on different deivces, assign a CompositeDevice to 618 // it. 619 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_CreatePackedTensorHandle( 620 TFE_Context* ctx, TFE_TensorHandle** handles, int* num_handles, 621 TF_Status* status); 622 623 // Configure soft device placement policy for the eager executor. Note this 624 // policy is applied to any subsequent op executions. 625 TF_CAPI_EXPORT void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx, 626 unsigned char enable, 627 TF_Status* status); 628 629 // Configure device placement policy logging for the eager executor. Note this 630 // policy is applied to any subsequent op executions. 631 TF_CAPI_EXPORT void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx, 632 unsigned char enable, 633 TF_Status* status); 634 635 // Returns the device type of the operation that produced `h`. 636 TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceType( 637 TFE_TensorHandle* h, TF_Status* status); 638 639 // Returns the device ID of the operation that produced `h`. 640 TF_CAPI_EXPORT extern int TFE_TensorHandleDeviceID(TFE_TensorHandle* h, 641 TF_Status* status); 642 643 // Get a comma-separated list of op names executed in graph functions dispatched 644 // to `ctx`. This feature is currently only enabled for TFRT debug builds, for 645 // performance and simplicity reasons. 646 TF_CAPI_EXPORT extern void TFE_GetExecutedOpNames(TFE_Context* ctx, 647 TF_Buffer* buf, 648 TF_Status* status); 649 650 // Set logical devices to the context's device manager. 651 // If logical devices are already configured at context initialization 652 // through TFE_ContextOptions, this method should not be called. 653 TF_CAPI_EXPORT extern void TFE_SetLogicalCpuDevices(TFE_Context* ctx, 654 int num_cpus, 655 const char* prefix, 656 TF_Status* status); 657 658 // Set configuration key and value using coordination service. 659 // If coordination service is enabled, the key-value will be stored on the 660 // leader and become accessible to all workers in the cluster. 661 // Currently, a config key can only be set with one value, and subsequently 662 // setting the same key will lead to errors. 663 // 664 // Note that the key-values are only expected to be used for cluster 665 // configuration data, and should not be used for storing large amount of data 666 // or being accessed very frequently. 667 TF_CAPI_EXPORT extern void TFE_InsertConfigKeyValue(TFE_Context* ctx, 668 const char* key, 669 const char* value, 670 TF_Status* status); 671 672 // Get configuration key and value using coordination service. 673 // The config key must be set before getting its value. Getting value of 674 // non-existing config keys will result in errors. 675 TF_CAPI_EXPORT extern void TFE_GetConfigKeyValue(TFE_Context* ctx, 676 const char* key, 677 TF_Buffer* value_buf, 678 TF_Status* status); 679 680 // Delete configuration key-value. If `key` is a directory, recursively clean up 681 // all key-values under the path specified by `key`. 682 TF_CAPI_EXPORT extern void TFE_DeleteConfigKeyValue(TFE_Context* ctx, 683 const char* key, 684 TF_Status* status); 685 686 // Report error (specified by error_code and error_message) to other tasks in 687 // the cluster. 688 TF_CAPI_EXPORT extern void TFE_ReportErrorToCluster(TFE_Context* ctx, 689 int error_code, 690 const char* error_message, 691 TF_Status* status); 692 693 #ifdef __cplusplus 694 } /* end extern "C" */ 695 #endif 696 697 #endif // TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_ 698